ProtobufAnnotationSerializer.java example

Explorer
Stanford-NLP-master
- CoreNLP-master
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ie.NumberNormalizer;
import edu.stanford.nlp.ie.machinereading.structure.EntityMention;
import edu.stanford.nlp.ie.machinereading.structure.ExtractionObject;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations.*;
import edu.stanford.nlp.ie.machinereading.structure.RelationMention;
import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.ie.util.RelationTriple;
import edu.stanford.nlp.international.Language;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.SegmenterCoreAnnotations;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.naturalli.*;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.time.Timex;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.trees.LabeledScoredTreeNode;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.*;
import edu.stanford.nlp.ling.CoreAnnotations.*;
import edu.stanford.nlp.trees.TreeCoreAnnotations.*;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.*;
import edu.stanford.nlp.time.TimeAnnotations.*;

import java.io.*;
import java.util.*;
import java.util.stream.Collectors;

import edu.stanford.nlp.coref.CorefCoreAnnotations.*;

import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.coref.data.Dictionaries;
import edu.stanford.nlp.coref.data.Mention;
import edu.stanford.nlp.coref.data.SpeakerInfo;

/**
 * <p>
 *   A serializer using Google's protocol buffer format.
 *   The files produced by this serializer, in addition to being language-independent,
 *   are a little over 10% the size and 4x faster to read+write versus the default Java serialization
 *   (see GenericAnnotationSerializer), when both files are compressed with gzip.
 * </p>
 *
 * <p>
 *   Note that this handles only a subset of the possible annotations
 *   that can be attached to a sentence. Nonetheless, it is guaranteed to be
 *   lossless with the default set of named annotators you can create from a
 *   {@link StanfordCoreNLP} pipeline, with default properties defined for each annotator.
 *   Note that the serializer does not gzip automatically -- this must be done by passing in a GZipOutputStream
 *   and calling a GZipInputStream manually. For most Annotations, gzipping provides a notable decrease in size (~2.5x)
 *   due to most of the data being raw Strings.
 * </p>
 *
 * <p>
 *   To allow lossy serialization, use {@link ProtobufAnnotationSerializer#ProtobufAnnotationSerializer(boolean)}.
 *   Otherwise, an exception is thrown if an unknown key appears in the annotation which would not be saved to th
 *   protocol buffer.
 *   If such keys exist, and are a part of the standard CoreNLP pipeline, please let us know!
 *   If you would like to serialize keys in addition to those serialized by default (e.g., you are attaching
 *   your own annotations), then you should do the following:
 * </p>
 *
 * <ol>
 *   <li>
 *     Create a .proto file which extends one or more of Document, Sentence, or Token. Each of these have fields
 *     100-255 left open for user extensions. An example of such an extension is:
 *     <pre>
 *       package edu.stanford.nlp.pipeline;
 *
 *       option java_package = "com.example.my.awesome.nlp.app";
 *       option java_outer_classname = "MyAppProtos";
 *
 *       import "CoreNLP.proto";
 *
 *       extend Sentence {
 *         optional uint32 myNewField    = 101;
 *       }
 *     </pre>
 *   </li>
 *
 *   <li>
 *     Compile your .proto file with protoc. For example (from CORENLP_HOME):
 *     <pre>
 *        protoc -I=src/edu/stanford/nlp/pipeline/:/path/to/folder/contining/your/proto/file --java_out=/path/to/output/src/folder/  /path/to/proto/file
 *     </pre>
 *   </li>
 *
 *   <li>
 *     <p>
 *     Extend {@link ProtobufAnnotationSerializer} to serialize and deserialize your field.
 *     Generally, this entail overriding two functions -- one to write the proto and one to read it.
 *     In both cases, you usually want to call the superclass' implementation of the function, and add on to it
 *     from there.
 *     In our running example, adding a field to the {@link CoreNLPProtos.Sentence} proto, you would overwrite:
 *     </p>
 *
 *     <ul>
 *       <li>{@link ProtobufAnnotationSerializer#toProtoBuilder(edu.stanford.nlp.util.CoreMap, java.util.Set)}</li>
 *       <li>{@link ProtobufAnnotationSerializer#fromProtoNoTokens(edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence)}</li>
 *     </ul>
 *
 *     <p>
 *     Note, importantly, that for the serializer to be able to check for lossless serialization, all annotations added
 *     to the proto must be registered as added by being removed from the set passed to
 *     {@link ProtobufAnnotationSerializer#toProtoBuilder(edu.stanford.nlp.util.CoreMap, java.util.Set)} (and the analogous
 *     functions for documents and tokens).
 *     </p>
 *
 *     <p>
 *       Lastly, the new annotations must be registered in the original .proto file; this can be achieved by including
 *       a static block in the overwritten class:
 *     </p>
 *     <pre>
 *       static {
 *         ExtensionRegistry registry = ExtensionRegistry.newInstance();
 *         registry.add(MyAppProtos.myNewField);
 *         CoreNLPProtos.registerAllExtensions(registry);
 *       }
 *     </pre>
 *   </li>
 * </ol>
 *
 *
 * TODOs
 * <ul>
 *   <li>In CoreNLP, the leaves of a tree are == to the tokens in a sentence. This is not the case for a deserialized proto.</li>
 * </ul>
 *
 * @author Gabor Angeli
 */
public class ProtobufAnnotationSerializer extends AnnotationSerializer {

  /** A global lock; necessary since dependency tree creation is not threadsafe */
  private static final Object globalLock = "I'm a lock :)";

  /**
   * An exception to denote that the serialization would be lossy.
   * This exception is thrown at serialization time.
   *
   * @see ProtobufAnnotationSerializer#enforceLosslessSerialization
   * @see ProtobufAnnotationSerializer#ProtobufAnnotationSerializer(boolean)
   */
  public static class LossySerializationException extends RuntimeException {
    private LossySerializationException(String msg) { super(msg); }
  }

  /**
   * If true, serialization is guaranteed to be lossless or else a runtime exception is thrown
   * at serialization time.
   */
  public final boolean enforceLosslessSerialization;

  /**
   * Create a new Annotation serializer outputting to a protocol buffer format.
   * This is guaranteed to either be a lossless compression, or throw an exception at
   * serialization time.
   */
  public ProtobufAnnotationSerializer() { this(true); }

  /**
   * Create a new Annotation serializer outputting to a protocol buffer format.
   *
   * @param enforceLosslessSerialization If set to true, a {@link ProtobufAnnotationSerializer.LossySerializationException}
   *                                     is thrown at serialization
   *                                     time if the serialization would be lossy. If set to false,
   *                                     these exceptions are ignored.
   *
   */
  public ProtobufAnnotationSerializer(boolean enforceLosslessSerialization) { this.enforceLosslessSerialization = enforceLosslessSerialization; }

  /** {@inheritDoc} */
  @Override
  public OutputStream write(Annotation corpus, OutputStream os) throws IOException {
    CoreNLPProtos.Document serialized = toProto(corpus);
    serialized.writeDelimitedTo(os);
    os.flush();
    return os;
  }

  /** {@inheritDoc} */
  @Override
  public Pair<Annotation, InputStream> read(InputStream is) throws IOException, ClassNotFoundException, ClassCastException {
    CoreNLPProtos.Document doc = CoreNLPProtos.Document.parseDelimitedFrom(is);
    return Pair.makePair(fromProto(doc), is);
  }

  /**
   * Read a single protocol buffer, which constitutes the entire stream.
   * This is in contrast to the default, where mutliple buffers may come out of the stream,
   * and therefore each one is prepended by the length of the buffer to follow.
   *
   * @param in The file to read.
   * @return A parsed Annotation.
   * @throws IOException In case the stream cannot be read from.
   */
  @SuppressWarnings({"UnusedDeclaration", "ThrowFromFinallyBlock"})
  public Annotation readUndelimited(File in) throws IOException {
    FileInputStream undelimited = new FileInputStream(in);
    CoreNLPProtos.Document doc;
    try (FileInputStream delimited = new FileInputStream(in)) {
      doc = CoreNLPProtos.Document.parseFrom(delimited);
    } catch (Exception e) {
      doc = CoreNLPProtos.Document.parseDelimitedFrom(undelimited);
    } finally {
      undelimited.close();
    }
    return fromProto(doc);
  }

  /**
   * Get a particular key from a CoreMap, registering it as being retrieved.
   * @param map The CoreMap to retrieve the key from.
   * @param keysToRegister A set of keys to remove this key from, representing to keys which should be retrieved by the serializer.
   * @param key The key key to retrieve.
   * @param <E> The class of the item which is being retrieved.
   * @return CoreMap.get(key)
   */
  private static <E> E getAndRegister(CoreMap map, Set<Class<?>> keysToRegister, Class<? extends CoreAnnotation<E>> key) {
    keysToRegister.remove(key);
    return map.get(key);
  }

  /**
   * Create a CoreLabel proto from a CoreLabel instance.
   * This is not static, as it optionally throws an exception if the serialization is lossy.
   * @param coreLabel The CoreLabel to convert
   * @return A protocol buffer message corresponding to this CoreLabel
   */
  public CoreNLPProtos.Token toProto(CoreLabel coreLabel) {
    Set<Class<?>> keysToSerialize = new HashSet<>(coreLabel.keySetNotNull());
    CoreNLPProtos.Token.Builder builder = toProtoBuilder(coreLabel, keysToSerialize);
    // Completeness check
    if (enforceLosslessSerialization && !keysToSerialize.isEmpty()) {
      throw new LossySerializationException("Keys are not being serialized: " + StringUtils.join(keysToSerialize));
    }
    return builder.build();
  }

  /**
   * <p>
   *   The method to extend by subclasses of the Protobuf Annotator if custom additions are added to Tokens.
   *   In contrast to {@link ProtobufAnnotationSerializer#toProto(edu.stanford.nlp.ling.CoreLabel)}, this function
   *   returns a builder that can be extended.
   * </p>
   *
   * @param coreLabel The sentence to save to a protocol buffer
   * @param keysToSerialize A set tracking which keys have been saved. It's important to remove any keys added to the proto
   *                        from this set, as the code tracks annotations to ensure lossless serialization
   */
  protected CoreNLPProtos.Token.Builder toProtoBuilder(CoreLabel coreLabel, Set<Class<?>> keysToSerialize) {
    CoreNLPProtos.Token.Builder builder = CoreNLPProtos.Token.newBuilder();
    Set<Class<?>> keySet = coreLabel.keySetNotNull();
    // Remove items serialized elsewhere from the required list
    keysToSerialize.remove(TextAnnotation.class);
    keysToSerialize.remove(SentenceIndexAnnotation.class);
    keysToSerialize.remove(DocIDAnnotation.class);
    keysToSerialize.remove(IndexAnnotation.class);
    keysToSerialize.remove(ParagraphAnnotation.class);
    // Remove items populated by number normalizer
    keysToSerialize.remove(NumericCompositeObjectAnnotation.class);
    keysToSerialize.remove(NumericCompositeTypeAnnotation.class);
    keysToSerialize.remove(NumericCompositeValueAnnotation.class);
    keysToSerialize.remove(NumericTypeAnnotation.class);
    keysToSerialize.remove(NumericValueAnnotation.class);
    // Remove items which were never supposed to be there in the first place
    keysToSerialize.remove(ForcedSentenceUntilEndAnnotation.class);
    keysToSerialize.remove(ForcedSentenceEndAnnotation.class);
    keysToSerialize.remove(HeadWordLabelAnnotation.class);
    keysToSerialize.remove(HeadTagLabelAnnotation.class);
    // Set the word (this may be null if the CoreLabel is storing a character (as in case of segmenter)
    if (coreLabel.word() != null)
      builder.setWord(coreLabel.word());
    // Optional fields
    if (keySet.contains(PartOfSpeechAnnotation.class)) { builder.setPos(coreLabel.tag()); keysToSerialize.remove(PartOfSpeechAnnotation.class); }
    if (keySet.contains(ValueAnnotation.class)) { builder.setValue(coreLabel.value()); keysToSerialize.remove(ValueAnnotation.class); }
    if (keySet.contains(CategoryAnnotation.class)) { builder.setCategory(coreLabel.category()); keysToSerialize.remove(CategoryAnnotation.class); }
    if (keySet.contains(BeforeAnnotation.class)) { builder.setBefore(coreLabel.before()); keysToSerialize.remove(BeforeAnnotation.class); }
    if (keySet.contains(AfterAnnotation.class)) { builder.setAfter(coreLabel.after()); keysToSerialize.remove(AfterAnnotation.class); }
    if (keySet.contains(OriginalTextAnnotation.class)) { builder.setOriginalText(coreLabel.originalText()); keysToSerialize.remove(OriginalTextAnnotation.class); }
    if (keySet.contains(NamedEntityTagAnnotation.class)) { builder.setNer(coreLabel.ner()); keysToSerialize.remove(NamedEntityTagAnnotation.class); }
    if (keySet.contains(CharacterOffsetBeginAnnotation.class)) { builder.setBeginChar(coreLabel.beginPosition()); keysToSerialize.remove(CharacterOffsetBeginAnnotation.class); }
    if (keySet.contains(CharacterOffsetEndAnnotation.class)) { builder.setEndChar(coreLabel.endPosition()); keysToSerialize.remove(CharacterOffsetEndAnnotation.class); }
    if (keySet.contains(LemmaAnnotation.class)) { builder.setLemma(coreLabel.lemma()); keysToSerialize.remove(LemmaAnnotation.class); }
    if (keySet.contains(UtteranceAnnotation.class)) { builder.setUtterance(getAndRegister(coreLabel, keysToSerialize, UtteranceAnnotation.class)); }
    if (keySet.contains(SpeakerAnnotation.class)) { builder.setSpeaker(getAndRegister(coreLabel, keysToSerialize, SpeakerAnnotation.class)); }
    if (keySet.contains(BeginIndexAnnotation.class)) { builder.setBeginIndex(getAndRegister(coreLabel, keysToSerialize, BeginIndexAnnotation.class)); }
    if (keySet.contains(EndIndexAnnotation.class)) { builder.setEndIndex(getAndRegister(coreLabel, keysToSerialize, EndIndexAnnotation.class)); }
    if (keySet.contains(TokenBeginAnnotation.class)) { builder.setTokenBeginIndex(getAndRegister(coreLabel, keysToSerialize, TokenBeginAnnotation.class)); }
    if (keySet.contains(TokenEndAnnotation.class)) { builder.setTokenEndIndex(getAndRegister(coreLabel, keysToSerialize, TokenEndAnnotation.class)); }
    if (keySet.contains(NormalizedNamedEntityTagAnnotation.class)) { builder.setNormalizedNER(getAndRegister(coreLabel, keysToSerialize, NormalizedNamedEntityTagAnnotation.class)); }
    if (keySet.contains(TimexAnnotation.class)) { builder.setTimexValue(toProto(getAndRegister(coreLabel, keysToSerialize, TimexAnnotation.class))); }
    if (keySet.contains(AnswerAnnotation.class)) { builder.setAnswer(getAndRegister(coreLabel, keysToSerialize, AnswerAnnotation.class)); }
    if (keySet.contains(WikipediaEntityAnnotation.class)) { builder.setWikipediaEntity(getAndRegister(coreLabel, keysToSerialize, WikipediaEntityAnnotation.class)); }
    if (keySet.contains(XmlContextAnnotation.class)) {
      builder.setHasXmlContext(true);
      builder.addAllXmlContext(getAndRegister(coreLabel, keysToSerialize, XmlContextAnnotation.class));
    } else {
      builder.setHasXmlContext(false);
    }
    if (keySet.contains(CorefClusterIdAnnotation.class)) { builder.setCorefClusterID(getAndRegister(coreLabel, keysToSerialize, CorefClusterIdAnnotation.class)); }
    if (keySet.contains(NaturalLogicAnnotations.OperatorAnnotation.class)) { builder.setOperator(toProto(getAndRegister(coreLabel, keysToSerialize, NaturalLogicAnnotations.OperatorAnnotation.class))); }
    if (keySet.contains(NaturalLogicAnnotations.PolarityAnnotation.class)) { builder.setPolarity(toProto(getAndRegister(coreLabel, keysToSerialize, NaturalLogicAnnotations.PolarityAnnotation.class))); }
    if (keySet.contains(SpanAnnotation.class)) {
      IntPair span = getAndRegister(coreLabel, keysToSerialize, SpanAnnotation.class);
      builder.setSpan(CoreNLPProtos.Span.newBuilder().setBegin(span.getSource()).setEnd(span.getTarget()).build());
    }
    if (keySet.contains(SentimentCoreAnnotations.SentimentClass.class)) { builder.setSentiment(getAndRegister(coreLabel, keysToSerialize, SentimentCoreAnnotations.SentimentClass.class)); }
    if (keySet.contains(QuotationIndexAnnotation.class)) { builder.setQuotationIndex(getAndRegister(coreLabel, keysToSerialize, QuotationIndexAnnotation.class)); }
    if (keySet.contains(CoNLLUFeats.class)) { builder.setConllUFeatures(toMapStringStringProto(getAndRegister(coreLabel, keysToSerialize, CoNLLUFeats.class))); }
    if (keySet.contains(CoNLLUTokenSpanAnnotation.class)) {
      IntPair span = getAndRegister(coreLabel, keysToSerialize, CoNLLUTokenSpanAnnotation.class);
      builder.setConllUTokenSpan(CoreNLPProtos.Span.newBuilder().setBegin(span.getSource()).setEnd(span.getTarget()).build());
    }
    if (keySet.contains(CoNLLUMisc.class)) { builder.setConllUMisc(getAndRegister(coreLabel, keysToSerialize, CoNLLUMisc.class));}
    if (keySet.contains(CoarseTagAnnotation.class)) { builder.setCoarseTag(getAndRegister(coreLabel, keysToSerialize, CoarseTagAnnotation.class));}
    if (keySet.contains(CoNLLUSecondaryDepsAnnotation.class)) { builder.setConllUSecondaryDeps(toMapIntStringProto(getAndRegister(coreLabel, keysToSerialize, CoNLLUSecondaryDepsAnnotation.class)));}

        // Non-default annotators
    if (keySet.contains(GenderAnnotation.class)) { builder.setGender(getAndRegister(coreLabel, keysToSerialize, GenderAnnotation.class)); }
    if (keySet.contains(TrueCaseAnnotation.class)) { builder.setTrueCase(getAndRegister(coreLabel, keysToSerialize, TrueCaseAnnotation.class)); }
    if (keySet.contains(TrueCaseTextAnnotation.class)) { builder.setTrueCaseText(getAndRegister(coreLabel, keysToSerialize, TrueCaseTextAnnotation.class)); }

    // Chinese character related stuff
    if (keySet.contains(ChineseCharAnnotation.class)) { builder.setChineseChar(getAndRegister(coreLabel, keysToSerialize, ChineseCharAnnotation.class)); }
    if (keySet.contains(ChineseSegAnnotation.class)) { builder.setChineseSeg(getAndRegister(coreLabel, keysToSerialize, ChineseSegAnnotation.class)); }

    // Return
    return builder;
  }

  /**
   * Create a protobuf builder, rather than a compiled protobuf.
   * Useful for, e.g., the simple CoreNLP interface.
   * @param sentence The sentence to serialize.
   * @return A Sentence builder.
   */
  @SuppressWarnings("unchecked")
  public CoreNLPProtos.Sentence.Builder toProtoBuilder(CoreMap sentence) {
    return toProtoBuilder(sentence, Collections.EMPTY_SET);
  }

  /**
   * Create a Sentence proto from a CoreMap instance.
   * This is not static, as it optionally throws an exception if the serialization is lossy.
   * @param sentence The CoreMap to convert. Note that it should not be a CoreLabel or an Annotation,
   *                 and should represent a sentence.
   * @return A protocol buffer message corresponding to this sentence
   * @throws IllegalArgumentException If the sentence is not a valid sentence (e.g., is a document or a word).
   */
  public CoreNLPProtos.Sentence toProto(CoreMap sentence) {
    Set<Class<?>> keysToSerialize = new HashSet<>(sentence.keySet());
    CoreNLPProtos.Sentence.Builder builder = toProtoBuilder(sentence, keysToSerialize);
    // Completeness check
    if (enforceLosslessSerialization && !keysToSerialize.isEmpty()) {
      throw new LossySerializationException("Keys are not being serialized: " + StringUtils.join(keysToSerialize));
    }
    return builder.build();
  }

  /**
   * <p>
   *   The method to extend by subclasses of the Protobuf Annotator if custom additions are added to Tokens.
   *   In contrast to {@link ProtobufAnnotationSerializer#toProto(edu.stanford.nlp.ling.CoreLabel)}, this function
   *   returns a builder that can be extended.
   * </p>
   *
   * @param sentence The sentence to save to a protocol buffer
   * @param keysToSerialize A set tracking which keys have been saved. It's important to remove any keys added to the proto
   *                        from this set, as the code tracks annotations to ensure lossless serialization.
   */
  @SuppressWarnings("deprecation")
  protected CoreNLPProtos.Sentence.Builder toProtoBuilder(CoreMap sentence, Set<Class<?>> keysToSerialize) {
    // Error checks
    if (sentence instanceof CoreLabel) { throw new IllegalArgumentException("CoreMap is actually a CoreLabel"); }
    CoreNLPProtos.Sentence.Builder builder = CoreNLPProtos.Sentence.newBuilder();
    // Remove items serialized elsewhere from the required list
    keysToSerialize.remove(TextAnnotation.class);
    keysToSerialize.remove(NumerizedTokensAnnotation.class);
    // Required fields
    builder.setTokenOffsetBegin(getAndRegister(sentence, keysToSerialize, TokenBeginAnnotation.class));
    builder.setTokenOffsetEnd(getAndRegister(sentence, keysToSerialize, TokenEndAnnotation.class));
    // Get key set of CoreMap
    Set<Class<?>> keySet;
    if (sentence instanceof ArrayCoreMap) {
       keySet = ((ArrayCoreMap) sentence).keySetNotNull();
    } else {
      keySet = new IdentityHashSet<>(sentence.keySet());
    }
    // Tokens
    if (sentence.containsKey(TokensAnnotation.class)) {
      for (CoreLabel tok : sentence.get(TokensAnnotation.class)) { builder.addToken(toProto(tok)); }
      keysToSerialize.remove(TokensAnnotation.class);
    }
    // Characters
    if (sentence.containsKey(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
      for (CoreLabel c : sentence.get(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
        builder.addCharacter(toProto(c));
      }
      keysToSerialize.remove(SegmenterCoreAnnotations.CharactersAnnotation.class);
    }
    // Optional fields
    if (keySet.contains(SentenceIndexAnnotation.class)) { builder.setSentenceIndex(getAndRegister(sentence, keysToSerialize, SentenceIndexAnnotation.class)); }
    if (keySet.contains(CharacterOffsetBeginAnnotation.class)) { builder.setCharacterOffsetBegin(getAndRegister(sentence, keysToSerialize, CharacterOffsetBeginAnnotation.class)); }
    if (keySet.contains(CharacterOffsetEndAnnotation.class)) { builder.setCharacterOffsetEnd(getAndRegister(sentence, keysToSerialize, CharacterOffsetEndAnnotation.class)); }
    if (keySet.contains(TreeAnnotation.class)) { builder.setParseTree(toProto(getAndRegister(sentence, keysToSerialize, TreeAnnotation.class))); }
    if (keySet.contains(BinarizedTreeAnnotation.class)) { builder.setBinarizedParseTree(toProto(getAndRegister(sentence, keysToSerialize, BinarizedTreeAnnotation.class))); }
    if (keySet.contains(KBestTreesAnnotation.class)) {
      for (Tree tree : sentence.get(KBestTreesAnnotation.class)) {
        builder.addKBestParseTrees(toProto(tree));
        keysToSerialize.remove(KBestTreesAnnotation.class);
      }
    }
    if (keySet.contains(SentimentCoreAnnotations.SentimentAnnotatedTree.class)) { builder.setAnnotatedParseTree(toProto(getAndRegister(sentence, keysToSerialize, SentimentCoreAnnotations.SentimentAnnotatedTree.class))); }
    if (keySet.contains(SentimentCoreAnnotations.SentimentClass.class)) { builder.setSentiment(getAndRegister(sentence, keysToSerialize, SentimentCoreAnnotations.SentimentClass.class)); }
    if (keySet.contains(BasicDependenciesAnnotation.class)) { builder.setBasicDependencies(toProto(getAndRegister(sentence, keysToSerialize, BasicDependenciesAnnotation.class))); }
    if (keySet.contains(CollapsedDependenciesAnnotation.class)) { builder.setCollapsedDependencies(toProto(getAndRegister(sentence, keysToSerialize, CollapsedDependenciesAnnotation.class))); }
    if (keySet.contains(CollapsedCCProcessedDependenciesAnnotation.class)) { builder.setCollapsedCCProcessedDependencies(toProto(getAndRegister(sentence, keysToSerialize, CollapsedCCProcessedDependenciesAnnotation.class))); }
    if (keySet.contains(AlternativeDependenciesAnnotation.class)) { builder.setAlternativeDependencies(toProto(getAndRegister(sentence, keysToSerialize, AlternativeDependenciesAnnotation.class))); }
    if (keySet.contains(EnhancedDependenciesAnnotation.class)) { builder.setEnhancedDependencies(toProto(getAndRegister(sentence, keysToSerialize, EnhancedDependenciesAnnotation.class))); }
    if (keySet.contains(EnhancedPlusPlusDependenciesAnnotation.class)) { builder.setEnhancedPlusPlusDependencies(toProto(getAndRegister(sentence, keysToSerialize, EnhancedPlusPlusDependenciesAnnotation.class))); }
    if (keySet.contains(TokensAnnotation.class) && getAndRegister(sentence, keysToSerialize, TokensAnnotation.class).size() > 0 &&
        getAndRegister(sentence, keysToSerialize, TokensAnnotation.class).get(0).containsKey(ParagraphAnnotation.class)) {
      builder.setParagraph(getAndRegister(sentence, keysToSerialize, TokensAnnotation.class).get(0).get(ParagraphAnnotation.class));
    }
    if (keySet.contains(NumerizedTokensAnnotation.class)) { builder.setHasNumerizedTokensAnnotation(true); } else { builder.setHasNumerizedTokensAnnotation(false); }
    if (keySet.contains(NaturalLogicAnnotations.EntailedSentencesAnnotation.class)) {
      for (SentenceFragment entailedSentence : getAndRegister(sentence, keysToSerialize, NaturalLogicAnnotations.EntailedSentencesAnnotation.class)) {
        builder.addEntailedSentence(toProto(entailedSentence));
      }
    }
    if (keySet.contains(NaturalLogicAnnotations.EntailedClausesAnnotation.class)) {
      for (SentenceFragment entailedClause : getAndRegister(sentence, keysToSerialize, NaturalLogicAnnotations.EntailedClausesAnnotation.class)) {
        builder.addEntailedClause(toProto(entailedClause));
      }
    }
    if (keySet.contains(NaturalLogicAnnotations.RelationTriplesAnnotation.class)) {
      for (RelationTriple triple : getAndRegister(sentence, keysToSerialize, NaturalLogicAnnotations.RelationTriplesAnnotation.class)) {
        builder.addOpenieTriple(toProto(triple));
      }
    }
    if (keySet.contains(KBPTriplesAnnotation.class)) {
      for (RelationTriple triple : getAndRegister(sentence, keysToSerialize, KBPTriplesAnnotation.class)) {
        builder.addKbpTriple(toProto(triple));
      }
    }
    // Non-default annotators
    if (keySet.contains(EntityMentionsAnnotation.class)) {
      builder.setHasRelationAnnotations(true);
      for (EntityMention entity : getAndRegister(sentence, keysToSerialize, EntityMentionsAnnotation.class)) {
        builder.addEntity(toProto(entity));
      }
    } else {
      builder.setHasRelationAnnotations(false);
    }
    if (keySet.contains(RelationMentionsAnnotation.class)) {
      if (!builder.getHasRelationAnnotations()) { throw new IllegalStateException("Registered entity mentions without relation mentions"); }
      for (RelationMention relation : getAndRegister(sentence, keysToSerialize, RelationMentionsAnnotation.class)) {
        builder.addRelation(toProto(relation));
      }
    }
    // add each of the mentions in the List<Mentions> for this sentence
    if (keySet.contains(CorefMentionsAnnotation.class)) {
      builder.setHasCorefMentionsAnnotation(true);
      for (Mention m : sentence.get(CorefMentionsAnnotation.class)) {
        builder.addMentionsForCoref(toProto(m));
      }
      keysToSerialize.remove(CorefMentionsAnnotation.class);
    }
    // Entity mentions
    if (keySet.contains(MentionsAnnotation.class)) {
      for (CoreMap mention : sentence.get(MentionsAnnotation.class)) {
        builder.addMentions(toProtoMention(mention));
      }
      keysToSerialize.remove(MentionsAnnotation.class);
    }
    // add a sentence id if it exists
    if (keySet.contains(SentenceIDAnnotation.class)) builder.setSentenceID(getAndRegister(sentence, keysToSerialize, SentenceIDAnnotation.class));

    // add section index
    if (keySet.contains(SectionIndexAnnotation.class)) builder.setSectionIndex(getAndRegister(sentence, keysToSerialize, SectionIndexAnnotation.class));

    // add section date
    if (keySet.contains(SectionDateAnnotation.class)) builder.setSectionDate(getAndRegister(sentence, keysToSerialize, SectionDateAnnotation.class));

    // Return
    return builder;
  }

  /**
   * Create a Document proto from a CoreMap instance.
   * This is not static, as it optionally throws an exception if the serialization is lossy.
   * @param doc The Annotation to convert.
   * @return A protocol buffer message corresponding to this document
   */
  public CoreNLPProtos.Document toProto(Annotation doc) {
    Set<Class<?>> keysToSerialize = new HashSet<>(doc.keySet());
    keysToSerialize.remove(TokensAnnotation.class);  // note(gabor): tokens are saved in the sentence
    CoreNLPProtos.Document.Builder builder = toProtoBuilder(doc, keysToSerialize);
    // Completeness Check
    if (enforceLosslessSerialization && !keysToSerialize.isEmpty()) {
      throw new LossySerializationException("Keys are not being serialized: " + StringUtils.join(keysToSerialize));
    }
    return builder.build();
  }

  /**
   * Create a protobuf builder, rather than a compiled protobuf.
   * Useful for, e.g., the simple CoreNLP interface.
   * @param doc The document to serialize.
   * @return A Document builder.
   */
  @SuppressWarnings("unchecked")
  public CoreNLPProtos.Document.Builder toProtoBuilder(Annotation doc) {
    return toProtoBuilder(doc, Collections.EMPTY_SET);
  }

  /**
   * <p>
   *   The method to extend by subclasses of the Protobuf Annotator if custom additions are added to Tokens.
   *   In contrast to {@link ProtobufAnnotationSerializer#toProto(edu.stanford.nlp.ling.CoreLabel)}, this function
   *   returns a builder that can be extended.
   * </p>
   *
   * @param doc The sentence to save to a protocol buffer
   * @param keysToSerialize A set tracking which keys have been saved. It's important to remove any keys added to the proto
   *                        from this set, as the code tracks annotations to ensure lossless serializationA set tracking which keys have been saved. It's important to remove any keys added to the proto*
   *                        from this set, as the code tracks annotations to ensure lossless serialization.
   */
  protected CoreNLPProtos.Document.Builder toProtoBuilder(Annotation doc, Set<Class<?>> keysToSerialize) {
    CoreNLPProtos.Document.Builder builder = CoreNLPProtos.Document.newBuilder();
    // Required fields
    builder.setText(doc.get(TextAnnotation.class));
    keysToSerialize.remove(TextAnnotation.class);
    // Optional fields
    if (doc.containsKey(SentencesAnnotation.class)) {
      for (CoreMap sentence : doc.get(SentencesAnnotation.class)) { builder.addSentence(toProto(sentence)); }
      keysToSerialize.remove(SentencesAnnotation.class);
    } else if (doc.containsKey(TokensAnnotation.class)) {
      for (CoreLabel token : doc.get(TokensAnnotation.class)) { builder.addSentencelessToken(toProto(token)); }
    }
    if (doc.containsKey(DocIDAnnotation.class)) {
      builder.setDocID(doc.get(DocIDAnnotation.class));
      keysToSerialize.remove(DocIDAnnotation.class);
    }
    if (doc.containsKey(DocDateAnnotation.class)) {
      builder.setDocDate(doc.get(DocDateAnnotation.class));
      keysToSerialize.remove(DocDateAnnotation.class);
    }
    if (doc.containsKey(CalendarAnnotation.class)) {
      builder.setCalendar(doc.get(CalendarAnnotation.class).toInstant().toEpochMilli());
      keysToSerialize.remove(CalendarAnnotation.class);
    }
    if (doc.containsKey(CorefChainAnnotation.class)) {
      for (Map.Entry<Integer, CorefChain> chain : doc.get(CorefChainAnnotation.class).entrySet()) {
       builder.addCorefChain(toProto(chain.getValue()));
      }
      keysToSerialize.remove(CorefChainAnnotation.class);
    }
    if (doc.containsKey(QuotationsAnnotation.class)) {
      for (CoreMap quote : doc.get(QuotationsAnnotation.class)) {
        builder.addQuote(toProtoQuote(quote));
      }
      keysToSerialize.remove(QuotationsAnnotation.class);
    }
    if (doc.containsKey(MentionsAnnotation.class)) {
      for (CoreMap mention : doc.get(MentionsAnnotation.class)) {
        builder.addMentions(toProtoMention(mention));
      }
      keysToSerialize.remove(MentionsAnnotation.class);
    }
    // add character info from segmenter
    if (doc.containsKey(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
      for (CoreLabel c : doc.get(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
        builder.addCharacter(toProto(c));
      }
      keysToSerialize.remove(SegmenterCoreAnnotations.CharactersAnnotation.class);
    }
    // add section info
    if (doc.containsKey(SectionsAnnotation.class)) {
      for (CoreMap section : doc.get(SectionsAnnotation.class)) {
        builder.addSections(toProtoSection(section));
      }
      keysToSerialize.remove(SectionsAnnotation.class);
    }
    // Return
    return builder;
  }

  /**
   * Create a ParseTree proto from a Tree. If the Tree is a scored tree, the scores will
   * be preserved.
   * @param parseTree The parse tree to convert.
   * @return A protocol buffer message corresponding to this tree.
   */
  public CoreNLPProtos.ParseTree toProto(Tree parseTree) {
    CoreNLPProtos.ParseTree.Builder builder = CoreNLPProtos.ParseTree.newBuilder();
    // Required fields
    for (Tree child : parseTree.children()) { builder.addChild(toProto(child)); }
    // Optional fields
    IntPair span = parseTree.getSpan();
    if (span != null) {
      builder.setYieldBeginIndex(span.getSource());
      builder.setYieldEndIndex(span.getTarget());
    }
    if (parseTree.label() != null) {
      builder.setValue(parseTree.label().value());
    }
    if (!Double.isNaN(parseTree.score())) {
      builder.setScore(parseTree.score());
    }
    Integer sentiment;
    if (parseTree.label() instanceof CoreMap && (sentiment = ((CoreMap) parseTree.label()).get(RNNCoreAnnotations.PredictedClass.class)) != null) {
      builder.setSentiment(CoreNLPProtos.Sentiment.valueOf(sentiment));
    }
    // Return
    return builder.build();
  }

  /**
   * Create a compact representation of the semantic graph for this dependency parse.
   * @param graph The dependency graph to save.
   * @return A protocol buffer message corresponding to this parse.
   */
  public static CoreNLPProtos.DependencyGraph toProto(SemanticGraph graph) {
    CoreNLPProtos.DependencyGraph.Builder builder = CoreNLPProtos.DependencyGraph.newBuilder();
    // Roots
    Set<Integer> rootSet = graph.getRoots().stream().map(IndexedWord::index).collect(Collectors.toCollection(IdentityHashSet::new));
    // Nodes
    for (IndexedWord node : graph.vertexSet()) {
      // Register node
      CoreNLPProtos.DependencyGraph.Node.Builder nodeBuilder = CoreNLPProtos.DependencyGraph.Node.newBuilder()
          .setSentenceIndex(node.get(SentenceIndexAnnotation.class))
          .setIndex(node.index());
      if (node.copyCount() > 0) {
        nodeBuilder.setCopyAnnotation(node.copyCount());
      }
      builder.addNode(nodeBuilder.build());
      // Register root
      if (rootSet.contains(node.index())) {
        builder.addRoot(node.index());
      }
    }
    // Edges
    for (SemanticGraphEdge edge : graph.edgeIterable()) {
      // Set edge
      builder.addEdge(CoreNLPProtos.DependencyGraph.Edge.newBuilder()
          .setSource(edge.getSource().index())
          .setTarget(edge.getTarget().index())
          .setDep(edge.getRelation().toString())
          .setIsExtra(edge.isExtra())
          .setSourceCopy(edge.getSource().copyCount())
          .setTargetCopy(edge.getTarget().copyCount())
          .setLanguage(toProto(edge.getRelation().getLanguage())));
    }
    // Return
    return builder.build();
  }
  /**
   * Create a CorefChain protocol buffer from the given coref chain.
   * @param chain The coref chain to convert.
   * @return A protocol buffer message corresponding to this chain.
   */
  public CoreNLPProtos.CorefChain toProto(CorefChain chain) {
    CoreNLPProtos.CorefChain.Builder builder = CoreNLPProtos.CorefChain.newBuilder();
    // Set ID
    builder.setChainID(chain.getChainID());
    // Set mentions
    Map<CorefChain.CorefMention, Integer> mentionToIndex = new IdentityHashMap<>();
    for (Map.Entry<IntPair, Set<CorefChain.CorefMention>> entry : chain.getMentionMap().entrySet()) {
      for (CorefChain.CorefMention mention : entry.getValue()) {
        mentionToIndex.put(mention, mentionToIndex.size());
        builder.addMention(CoreNLPProtos.CorefChain.CorefMention.newBuilder()
            .setMentionID(mention.mentionID)
            .setMentionType(mention.mentionType.name())
            .setNumber(mention.number.name())
            .setGender(mention.gender.name())
            .setAnimacy(mention.animacy.name())
            .setBeginIndex(mention.startIndex - 1)
            .setEndIndex(mention.endIndex - 1)
            .setHeadIndex(mention.headIndex - 1)
            .setSentenceIndex(mention.sentNum - 1)
            .setPosition(mention.position.get(1)) );
      }
    }
    // Set representative mention
    builder.setRepresentative(mentionToIndex.get(chain.getRepresentativeMention()));
    // Return
    return builder.build();
  }

  /**
   * Create a Section CoreMap protocol buffer from the given Section CoreMap
   * @param section
   * @return
   */
  public CoreNLPProtos.Section toProtoSection(CoreMap section) {
    CoreNLPProtos.Section.Builder builder = CoreNLPProtos.Section.newBuilder();
    // Set char start
    builder.setCharBegin(section.get(CharacterOffsetBeginAnnotation.class));
    // Set char end
    builder.setCharEnd(section.get(CharacterOffsetEndAnnotation.class));
    // Set author
    if (section.get(AuthorAnnotation.class) != null)
      builder.setAuthor(section.get(AuthorAnnotation.class));
    // Set date time
    if (section.get(SectionDateAnnotation.class) != null)
      builder.setDatetime(section.get(SectionDateAnnotation.class));
    // add the sentence indexes for the sentences in this section
    for (CoreMap sentence : section.get(SentencesAnnotation.class)) {
      int sentenceIndex = sentence.get(SentenceIndexAnnotation.class);
      builder.addSentenceIndexes(sentenceIndex);
    }
    return builder.build();
  }

  public CoreNLPProtos.IndexedWord createIndexedWordProtoFromIW(IndexedWord iw) {
    CoreNLPProtos.IndexedWord.Builder builder = CoreNLPProtos.IndexedWord.newBuilder();
    if (iw == null) {
      builder.setSentenceNum(-1);
      builder.setTokenIndex(-1);
    } else {
      builder.setSentenceNum(iw.get(SentenceIndexAnnotation.class) - 1);
      builder.setTokenIndex(iw.get(IndexAnnotation.class) - 1);
      builder.setCopyCount(iw.copyCount());
    }
    return builder.build();

  }

  public CoreNLPProtos.IndexedWord createIndexedWordProtoFromCL(CoreLabel cl) {
    CoreNLPProtos.IndexedWord.Builder builder = CoreNLPProtos.IndexedWord.newBuilder();
    if (cl == null) {
      builder.setSentenceNum(-1);
      builder.setTokenIndex(-1);
    } else {
      builder.setSentenceNum(cl.get(SentenceIndexAnnotation.class) - 1);
      builder.setTokenIndex(cl.get(IndexAnnotation.class) - 1);
    }
    return builder.build();
  }

  public CoreNLPProtos.Mention toProto(Mention mention) {

    // create the builder
    CoreNLPProtos.Mention.Builder builder = CoreNLPProtos.Mention.newBuilder();

    // set enums
    if (mention.mentionType != null) { builder.setMentionType(mention.mentionType.name()); }
    if (mention.gender != null) { builder.setGender(mention.gender.name()); }
    if (mention.number != null) { builder.setNumber(mention.number.name()); }
    if (mention.animacy != null) { builder.setAnimacy(mention.animacy.name()); }
    if (mention.person != null) { builder.setPerson(mention.person.name()); }

    if (mention.headString != null) {
      builder.setHeadString(mention.headString);
    }
    if (mention.nerString != null) {
      builder.setNerString(mention.nerString);
    }

    builder.setStartIndex(mention.startIndex);
    builder.setEndIndex(mention.endIndex);
    builder.setHeadIndex(mention.headIndex);
    builder.setMentionID(mention.mentionID);
    builder.setOriginalRef(mention.originalRef);
    builder.setGoldCorefClusterID(mention.goldCorefClusterID);
    builder.setCorefClusterID(mention.corefClusterID);
    builder.setMentionNum(mention.mentionNum);
    builder.setSentNum(mention.sentNum);
    builder.setUtter(mention.utter);
    builder.setParagraph(mention.paragraph);
    builder.setIsSubject(mention.isSubject);
    builder.setIsDirectObject(mention.isDirectObject);
    builder.setIsIndirectObject(mention.isIndirectObject);
    builder.setIsPrepositionObject(mention.isPrepositionObject);
    builder.setHasTwin(mention.hasTwin);
    builder.setGeneric(mention.generic);
    builder.setIsSingleton(mention.isSingleton);

    // handle the two sets of Strings
    if (mention.dependents != null) {
      mention.dependents.forEach(builder::addDependents);
    }

    if (mention.preprocessedTerms != null) {
      mention.preprocessedTerms.forEach(builder::addPreprocessedTerms);
    }

    // set IndexedWords by storing (sentence number, token index) pairs
    builder.setDependingVerb(createIndexedWordProtoFromIW(mention.dependingVerb));
    builder.setHeadIndexedWord(createIndexedWordProtoFromIW(mention.headIndexedWord));
    builder.setHeadWord(createIndexedWordProtoFromCL(mention.headWord));
    //CoreLabel headWord = (mention.headWord != null) ? mention.headWord : null;
    //builder.setHeadWord(createCoreLabelPositionProto(mention.headWord));

    // add positions for each CoreLabel in sentence
    if (mention.sentenceWords != null) {
      for (CoreLabel cl : mention.sentenceWords) {
        builder.addSentenceWords(createIndexedWordProtoFromCL(cl));
      }
    }

    if (mention.originalSpan != null) {
      for (CoreLabel cl : mention.originalSpan) {
        builder.addOriginalSpan(createIndexedWordProtoFromCL(cl));
      }
    }

    // flag if this Mention should get basicDependency, collapsedDependency, and contextParseTree or not
    builder.setHasBasicDependency((mention.basicDependency != null));
    builder.setHasEnhancedDepenedncy((mention.enhancedDependency != null));
    builder.setHasContextParseTree((mention.contextParseTree != null));

    // handle the sets of Mentions, just store mentionID
    if (mention.appositions != null) {
      for (Mention m : mention.appositions) {
        builder.addAppositions(m.mentionID);
      }
    }

    if (mention.predicateNominatives != null) {
      for (Mention m : mention.predicateNominatives) {
        builder.addPredicateNominatives(m.mentionID);
      }
    }

    if (mention.relativePronouns != null) {
      for (Mention m : mention.relativePronouns) {
        builder.addRelativePronouns(m.mentionID);
      }
    }

    if (mention.listMembers != null) {
      for (Mention m : mention.listMembers) {
        builder.addListMembers(m.mentionID);
      }
    }

    if (mention.belongToLists != null) {
      for (Mention m : mention.belongToLists) {
        builder.addBelongToLists(m.mentionID);
      }
    }

    if (mention.speakerInfo != null) {
      builder.setSpeakerInfo(toProto(mention.speakerInfo));
    }

    return builder.build();
  }

  public CoreNLPProtos.SpeakerInfo toProto(SpeakerInfo speakerInfo) {
    CoreNLPProtos.SpeakerInfo.Builder builder = CoreNLPProtos.SpeakerInfo.newBuilder();
    builder.setSpeakerName(speakerInfo.getSpeakerName());
    // mentionID's should be set by MentionAnnotator
    for (Mention m : speakerInfo.getMentions()) {
      builder.addMentions(m.mentionID);
    }
    return builder.build();
  }


  /**
   * Convert the given Timex object to a protocol buffer.
   * @param timex The timex to convert.
   * @return A protocol buffer corresponding to this Timex object.
   */
  public CoreNLPProtos.Timex toProto(Timex timex) {
    CoreNLPProtos.Timex.Builder builder = CoreNLPProtos.Timex.newBuilder();
    if (timex.value() != null) { builder.setValue(timex.value()); }
    if (timex.altVal() != null) { builder.setAltValue(timex.altVal()); }
    if (timex.text() != null) { builder.setText(timex.text()); }
    if (timex.timexType() != null) { builder.setType(timex.timexType()); }
    if (timex.tid() != null) { builder.setTid(timex.tid()); }
    if (timex.beginPoint() >= 0) { builder.setBeginPoint(timex.beginPoint()); }
    if (timex.endPoint() >= 0) { builder.setEndPoint(timex.endPoint()); }
    return builder.build();
  }

  /**
   * Serialize the given entity mention to the corresponding protocol buffer.
   * @param ent The entity mention to serialize.
   * @return A protocol buffer corresponding to the serialized entity mention.
   */
  public CoreNLPProtos.Entity toProto(EntityMention ent) {
    CoreNLPProtos.Entity.Builder builder = CoreNLPProtos.Entity.newBuilder();
    // From ExtractionObject
    if (ent.getObjectId() != null) { builder.setObjectID(ent.getObjectId()); }
    if (ent.getExtent() != null) { builder.setExtentStart(ent.getExtent().start()).setExtentEnd(ent.getExtent().end()); }
    if (ent.getType() != null) { builder.setType(ent.getType()); }
    if (ent.getSubType() != null) { builder.setSubtype(ent.getSubType()); }
    // From Entity
    if (ent.getHead() != null) { builder.setHeadStart(ent.getHead().start()); builder.setHeadEnd(ent.getHead().end()); }
    if (ent.getMentionType() != null) { builder.setMentionType(ent.getMentionType()); }
    if (ent.getNormalizedName() != null) { builder.setNormalizedName(ent.getNormalizedName()); }
    if (ent.getSyntacticHeadTokenPosition() >= 0) { builder.setHeadTokenIndex(ent.getSyntacticHeadTokenPosition()); }
    if (ent.getCorefID() != null) { builder.setCorefID(ent.getCorefID()); }
    // Return
    return builder.build();

  }

  /**
   * Serialize the given relation mention to the corresponding protocol buffer.
   * @param rel The relation mention to serialize.
   * @return A protocol buffer corresponding to the serialized relation mention.
   */
  public CoreNLPProtos.Relation toProto(RelationMention rel) {
    CoreNLPProtos.Relation.Builder builder = CoreNLPProtos.Relation.newBuilder();
    // From ExtractionObject
    if (rel.getObjectId() != null) { builder.setObjectID(rel.getObjectId()); }
    if (rel.getExtent() != null) { builder.setExtentStart(rel.getExtent().start()).setExtentEnd(rel.getExtent().end()); }
    if (rel.getType() != null) { builder.setType(rel.getType()); }
    if (rel.getSubType() != null) { builder.setSubtype(rel.getSubType()); }
    // From Relation
    if (rel.getArgNames() != null) {
      rel.getArgNames().forEach(builder::addArgName);
    }
    if (rel.getArgs() != null) { for (ExtractionObject arg : rel.getArgs()) { builder.addArg(toProto((EntityMention) arg)); } }
    // Return
    return builder.build();
  }

  /**
   * Serialize a CoreNLP Language to a Protobuf Language.
   * @param lang The language to serialize.
   * @return The language in a Protobuf enum.
   */
  public static CoreNLPProtos.Language toProto(Language lang) {
    switch (lang) {
      case Arabic:
        return CoreNLPProtos.Language.Arabic;
      case Chinese:
        return CoreNLPProtos.Language.Chinese;
      case UniversalChinese:
        return CoreNLPProtos.Language.UniversalChinese;
      case English:
        return CoreNLPProtos.Language.English;
      case UniversalEnglish:
        return CoreNLPProtos.Language.UniversalEnglish;
      case German:
        return CoreNLPProtos.Language.German;
      case French:
        return CoreNLPProtos.Language.French;
      case Hebrew:
        return CoreNLPProtos.Language.Hebrew;
      case Spanish:
        return CoreNLPProtos.Language.Spanish;
      case Unknown:
        return CoreNLPProtos.Language.Unknown;
      case Any:
        return CoreNLPProtos.Language.Any;
      default:
        throw new IllegalStateException("Unknown language: " + lang);
    }
  }

  /**
   * Return a Protobuf operator from an OperatorSpec (Natural Logic).
   */
  public static CoreNLPProtos.Operator toProto(OperatorSpec op) {
    return CoreNLPProtos.Operator.newBuilder()
        .setName(op.instance.name()).setQuantifierSpanBegin(op.quantifierBegin).setQuantifierSpanEnd(op.quantifierEnd)
        .setSubjectSpanBegin(op.subjectBegin).setSubjectSpanEnd(op.subjectEnd)
        .setObjectSpanBegin(op.objectBegin).setObjectSpanEnd(op.objectEnd).build();
  }

  /**
   * Return a Protobuf polarity from a CoreNLP Polarity (Natural Logic).
   */
  public static CoreNLPProtos.Polarity toProto(Polarity pol) {
    return CoreNLPProtos.Polarity.newBuilder()
        .setProjectEquivalence(CoreNLPProtos.NaturalLogicRelation.valueOf(pol.projectLexicalRelation(NaturalLogicRelation.EQUIVALENT).fixedIndex))
        .setProjectForwardEntailment(CoreNLPProtos.NaturalLogicRelation.valueOf(pol.projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT).fixedIndex))
        .setProjectReverseEntailment(CoreNLPProtos.NaturalLogicRelation.valueOf(pol.projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT).fixedIndex))
        .setProjectNegation(CoreNLPProtos.NaturalLogicRelation.valueOf(pol.projectLexicalRelation(NaturalLogicRelation.NEGATION).fixedIndex))
        .setProjectAlternation(CoreNLPProtos.NaturalLogicRelation.valueOf(pol.projectLexicalRelation(NaturalLogicRelation.ALTERNATION).fixedIndex))
        .setProjectCover(CoreNLPProtos.NaturalLogicRelation.valueOf(pol.projectLexicalRelation(NaturalLogicRelation.COVER).fixedIndex))
        .setProjectIndependence(CoreNLPProtos.NaturalLogicRelation.valueOf(pol.projectLexicalRelation(NaturalLogicRelation.INDEPENDENCE).fixedIndex))
        .build();
  }

  /**
   * Return a Protobuf RelationTriple from a RelationTriple.
   */
  public static CoreNLPProtos.SentenceFragment toProto(SentenceFragment fragment) {
    return CoreNLPProtos.SentenceFragment.newBuilder()
        .setAssumedTruth(fragment.assumedTruth)
        .setScore(fragment.score)
        .addAllTokenIndex(fragment.words.stream().map(x -> x.index() - 1).collect(Collectors.toList()))
        .setRoot(fragment.parseTree.getFirstRoot().index() - 1)
        .build();
  }


  /**
   * Return a Protobuf RelationTriple from a RelationTriple.
   */
  public static CoreNLPProtos.RelationTriple toProto(RelationTriple triple) {
    CoreNLPProtos.RelationTriple.Builder builder = CoreNLPProtos.RelationTriple.newBuilder()
        .setSubject(triple.subjectGloss())
        .setRelation(triple.relationGloss())
        .setObject(triple.objectGloss())
        .setConfidence(triple.confidence)
        .addAllSubjectTokens(triple.subject.stream().map(token ->
            CoreNLPProtos.TokenLocation.newBuilder()
                .setSentenceIndex(token.sentIndex())
                .setTokenIndex(token.index() - 1)
                .build())
            .collect(Collectors.toList()))
        .addAllRelationTokens(
            triple.relation.size() == 1 && triple.relation.get(0).get(IndexAnnotation.class) == null
                ? Collections.emptyList()  // case: this is not a real relation token, but rather a placeholder relation
                : triple.relation.stream().map(token ->
                CoreNLPProtos.TokenLocation.newBuilder()
                    .setSentenceIndex(token.sentIndex())
                    .setTokenIndex(token.index() - 1)
                    .build())
                .collect(Collectors.toList()))
        .addAllObjectTokens(triple.object.stream().map(token ->
            CoreNLPProtos.TokenLocation.newBuilder()
                .setSentenceIndex(token.sentIndex())
                .setTokenIndex(token.index() - 1)
                .build())
            .collect(Collectors.toList()));
    Optional<SemanticGraph> treeOptional = triple.asDependencyTree();
    if (treeOptional.isPresent()) {
      builder.setTree(toProto(treeOptional.get()));
    }
    return builder.build();
  }

  /**
   * Serialize a Map (from Strings to Strings) to a proto.
   *
   * @param map The map to serialize.
   *
   * @return A proto representation of the map.
   */
  public static CoreNLPProtos.MapStringString toMapStringStringProto(Map<String,String> map) {
    CoreNLPProtos.MapStringString.Builder proto = CoreNLPProtos.MapStringString.newBuilder();
    for (Map.Entry<String, String> entry : map.entrySet()) {
      proto.addKey(entry.getKey());
      proto.addValue(entry.getValue());
    }
    return proto.build();
  }

  /**
   * Serialize a Map (from Integers to Strings) to a proto.
   *
   * @param map The map to serialize.
   *
   * @return A proto representation of the map.
   */
  public static CoreNLPProtos.MapIntString toMapIntStringProto(Map<Integer,String> map) {
      CoreNLPProtos.MapIntString.Builder proto = CoreNLPProtos.MapIntString.newBuilder();
      for (Map.Entry<Integer, String> entry : map.entrySet()) {
          proto.addKey(entry.getKey());
          proto.addValue(entry.getValue());
      }
      return proto.build();
  }


  /**
   * Convert a quote object to a protocol buffer.
   */
  public static CoreNLPProtos.Quote toProtoQuote(CoreMap quote) {
    CoreNLPProtos.Quote.Builder builder = CoreNLPProtos.Quote.newBuilder();
    if (quote.get(TextAnnotation.class) != null) { builder.setText(quote.get(TextAnnotation.class)); }
    if (quote.get(DocIDAnnotation.class) != null) { builder.setDocid(quote.get(DocIDAnnotation.class)); }
    if (quote.get(CharacterOffsetBeginAnnotation.class) != null) { builder.setBegin(quote.get(CharacterOffsetBeginAnnotation.class)); }
    if (quote.get(CharacterOffsetEndAnnotation.class) != null) { builder.setEnd(quote.get(CharacterOffsetEndAnnotation.class)); }
    if (quote.get(SentenceBeginAnnotation.class) != null) { builder.setSentenceBegin(quote.get(SentenceBeginAnnotation.class)); }
    if (quote.get(SentenceEndAnnotation.class) != null) { builder.setSentenceEnd(quote.get(SentenceEndAnnotation.class)); }
    if (quote.get(TokenBeginAnnotation.class) != null) { builder.setTokenBegin(quote.get(TokenBeginAnnotation.class)); }
    if (quote.get(TokenEndAnnotation.class) != null) { builder.setTokenEnd(quote.get(TokenEndAnnotation.class)); }
    if (quote.get(QuotationIndexAnnotation.class) != null) { builder.setIndex(quote.get(QuotationIndexAnnotation.class)); }
    return builder.build();
  }

  /**
   * Convert a mention object to a protocol buffer.
   */
  public CoreNLPProtos.NERMention toProtoMention(CoreMap mention) {
    CoreNLPProtos.NERMention.Builder builder = CoreNLPProtos.NERMention.newBuilder();
    if (mention.get(SentenceIndexAnnotation.class) != null) { builder.setSentenceIndex(mention.get(SentenceIndexAnnotation.class)); }
    if (mention.get(TokenBeginAnnotation.class) != null) { builder.setTokenStartInSentenceInclusive(mention.get(TokenBeginAnnotation.class)); }
    if (mention.get(TokenEndAnnotation.class) != null) { builder.setTokenEndInSentenceExclusive(mention.get(TokenEndAnnotation.class)); }
    if (mention.get(NamedEntityTagAnnotation.class) != null) { builder.setNer(mention.get(NamedEntityTagAnnotation.class)); }
    if (mention.get(NormalizedNamedEntityTagAnnotation.class) != null) { builder.setNormalizedNER(mention.get(NormalizedNamedEntityTagAnnotation.class)); }
    if (mention.get(EntityTypeAnnotation.class) != null) { builder.setEntityType(mention.get(EntityTypeAnnotation.class)); }
    if (mention.get(TimexAnnotation.class) != null) { builder.setTimex(toProto(mention.get(TimexAnnotation.class))); }
    if (mention.get(WikipediaEntityAnnotation.class) != null) { builder.setWikipediaEntity(mention.get(WikipediaEntityAnnotation.class)); }

    return builder.build();
  }

  /**
   * Create a CoreLabel from its serialized counterpart.
   * Note that this is, by itself, a lossy operation. Fields like the docid (sentence index, etc.) are only known
   * from the enclosing document, and are not tracked in the protobuf.
   * @param proto The serialized protobuf to read the CoreLabel from.
   * @return A CoreLabel, missing the fields that are not stored in the CoreLabel protobuf.
   */
  public CoreLabel fromProto(CoreNLPProtos.Token proto) {
    if (Thread.interrupted()) {
      throw new RuntimeInterruptedException();
    }
    CoreLabel word = new CoreLabel();
    // Required fields
    word.setWord(proto.getWord());
    // Optional fields
    if (proto.hasPos()) { word.setTag(proto.getPos()); }
    if (proto.hasValue()) { word.setValue(proto.getValue()); }
    if (proto.hasCategory()) { word.setCategory(proto.getCategory()); }
    if (proto.hasBefore()) { word.setBefore(proto.getBefore()); }
    if (proto.hasAfter()) { word.setAfter(proto.getAfter()); }
    if (proto.hasOriginalText()) { word.setOriginalText(proto.getOriginalText()); }
    if (proto.hasNer()) { word.setNER(proto.getNer()); }
    if (proto.hasLemma()) { word.setLemma(proto.getLemma()); }
    if (proto.hasBeginChar()) { word.setBeginPosition(proto.getBeginChar()); }
    if (proto.hasEndChar()) { word.setEndPosition(proto.getEndChar()); }
    if (proto.hasSpeaker()) { word.set(SpeakerAnnotation.class, proto.getSpeaker()); }
    if (proto.hasUtterance()) { word.set(UtteranceAnnotation.class, proto.getUtterance()); }
    if (proto.hasBeginIndex()) { word.set(BeginIndexAnnotation.class, proto.getBeginIndex()); }
    if (proto.hasEndIndex()) { word.set(EndIndexAnnotation.class, proto.getEndIndex()); }
    if (proto.hasTokenBeginIndex()) { word.set(TokenBeginAnnotation.class, proto.getTokenBeginIndex()); }
    if (proto.hasTokenEndIndex()) { word.set(TokenEndAnnotation.class, proto.getTokenEndIndex()); }
    if (proto.hasNormalizedNER()) { word.set(NormalizedNamedEntityTagAnnotation.class, proto.getNormalizedNER()); }
    if (proto.hasTimexValue()) { word.set(TimexAnnotation.class, fromProto(proto.getTimexValue())); }
    if (proto.hasHasXmlContext() && proto.getHasXmlContext()) { word.set(XmlContextAnnotation.class, proto.getXmlContextList()); }
    if (proto.hasCorefClusterID()) { word.set(CorefClusterIdAnnotation.class, proto.getCorefClusterID()); }
    if (proto.hasAnswer()) { word.set(AnswerAnnotation.class, proto.getAnswer()); }
    if (proto.hasOperator()) { word.set(NaturalLogicAnnotations.OperatorAnnotation.class, fromProto(proto.getOperator())); }
    if (proto.hasPolarity()) { word.set(NaturalLogicAnnotations.PolarityAnnotation.class, fromProto(proto.getPolarity())); }
    if (proto.hasSpan()) { word.set(SpanAnnotation.class, new IntPair(proto.getSpan().getBegin(), proto.getSpan().getEnd())); }
    if (proto.hasSentiment()) { word.set(SentimentCoreAnnotations.SentimentClass.class, proto.getSentiment()); }
    if (proto.hasQuotationIndex()) { word.set(QuotationIndexAnnotation.class, proto.getQuotationIndex()); }
    if (proto.hasConllUFeatures()) { word.set(CoNLLUFeats.class, fromProto(proto.getConllUFeatures())); }
    if (proto.hasConllUMisc()) { word.set(CoNLLUMisc.class, proto.getConllUMisc()); }
    if (proto.hasCoarseTag()) { word.set(CoarseTagAnnotation.class, proto.getCoarseTag()); }
    if (proto.hasConllUTokenSpan()) { word.set(CoNLLUTokenSpanAnnotation.class, new IntPair(proto.getConllUTokenSpan().getBegin(), proto.getSpan().getEnd())); }
    if (proto.hasConllUSecondaryDeps()) { word.set(CoNLLUSecondaryDepsAnnotation.class, fromProto(proto.getConllUSecondaryDeps())); }
    if (proto.hasWikipediaEntity()) { word.set(WikipediaEntityAnnotation.class, proto.getWikipediaEntity()); }
    // Chinese char info
    if (proto.hasChineseChar()) { word.set(ChineseCharAnnotation.class, proto.getChineseChar()) ; }
    if (proto.hasChineseSeg()) { word.set(ChineseSegAnnotation.class, proto.getChineseSeg()) ; }

    // Non-default annotators
    if (proto.hasGender()) { word.set(GenderAnnotation.class, proto.getGender()); }
    if (proto.hasTrueCase()) { word.set(TrueCaseAnnotation.class, proto.getTrueCase()); }
    if (proto.hasTrueCaseText()) { word.set(TrueCaseTextAnnotation.class, proto.getTrueCaseText()); }

    // Return
    return word;
  }

  /**
   * Create a CoreMap representing a sentence from this protocol buffer.
   * This should not be used if you are reading a whole document, as it populates the tokens independent of the
   * document tokens, which is not the behavior an {@link edu.stanford.nlp.pipeline.Annotation} expects.
   *
   * @param proto The protocol buffer to read from.
   * @return A CoreMap representing the sentence.
   */
  @SuppressWarnings("deprecation")
  @Deprecated
  public CoreMap fromProto(CoreNLPProtos.Sentence proto) {
    if (Thread.interrupted()) {
      throw new RuntimeInterruptedException();
    }
    CoreMap lossySentence = fromProtoNoTokens(proto);
    // Add tokens -- missing by default as they're populated as sublists of the document tokens
    List<CoreLabel> tokens = proto.getTokenList().stream().map(this::fromProto).collect(Collectors.toList());
    lossySentence.set(TokensAnnotation.class, tokens);
    // Add dependencies
    if (proto.hasBasicDependencies()) {
      lossySentence.set(BasicDependenciesAnnotation.class, fromProto(proto.getBasicDependencies(), tokens, null));
    }
    if (proto.hasCollapsedDependencies()) {
      lossySentence.set(CollapsedDependenciesAnnotation.class, fromProto(proto.getCollapsedDependencies(), tokens, null));
    }
    if (proto.hasCollapsedCCProcessedDependencies()) {
      lossySentence.set(CollapsedCCProcessedDependenciesAnnotation.class, fromProto(proto.getCollapsedCCProcessedDependencies(), tokens, null));
    }
    if (proto.hasAlternativeDependencies()) {
      lossySentence.set(AlternativeDependenciesAnnotation.class, fromProto(proto.getAlternativeDependencies(), tokens, null));
    }
    if (proto.hasEnhancedDependencies()) {
      lossySentence.set(EnhancedDependenciesAnnotation.class, fromProto(proto.getEnhancedDependencies(), tokens, null));
    }
    if (proto.hasEnhancedPlusPlusDependencies()) {
      lossySentence.set(EnhancedPlusPlusDependenciesAnnotation.class, fromProto(proto.getEnhancedPlusPlusDependencies(), tokens, null));
    }
    // Add entailed sentences
    if (proto.getEntailedSentenceCount() > 0) {
      List<SentenceFragment> entailedSentences = proto.getEntailedSentenceList().stream().map(frag -> fromProto(frag, lossySentence.get(CollapsedDependenciesAnnotation.class))).collect(Collectors.toList());
      lossySentence.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, entailedSentences);
    }
    // Add entailed clauses
    if (proto.getEntailedClauseCount() > 0) {
      List<SentenceFragment> entailedClauses = proto.getEntailedClauseList().stream().map(frag -> fromProto(frag, lossySentence.get(CollapsedDependenciesAnnotation.class))).collect(Collectors.toList());
      lossySentence.set(NaturalLogicAnnotations.EntailedClausesAnnotation.class, entailedClauses);
    }
    // Add relation triples
    if (proto.getOpenieTripleCount() > 0) {
      throw new IllegalStateException("Cannot deserialize OpenIE triples with this method!");
    }
    if (proto.getKbpTripleCount() > 0) {
      throw new IllegalStateException("Cannot deserialize KBP triples with this method!");
    }
    // Add chinese characters
    if (proto.getCharacterCount() > 0) {
      List<CoreLabel> sentenceCharacters =
              proto.getCharacterList().stream().map(c -> fromProto(c)).collect(Collectors.toList());
      lossySentence.set(SegmenterCoreAnnotations.CharactersAnnotation.class, sentenceCharacters);
    }
    // Add text -- missing by default as it's populated from the Document
    lossySentence.set(TextAnnotation.class, recoverOriginalText(tokens, proto));

    // Return
    return lossySentence;
  }

  /**
   * Create a CoreMap representing a sentence from this protocol buffer.
   * Note that the sentence is very lossy -- most glaringly, the tokens are missing, awaiting a document
   * to be filled in from.
   * @param proto The serialized protobuf to read the sentence from.
   * @return A CoreMap, representing a sentence as stored in the protocol buffer (and therefore missing some fields)
   */
  protected CoreMap fromProtoNoTokens(CoreNLPProtos.Sentence proto) {
    if (Thread.interrupted()) {
      throw new RuntimeInterruptedException();
    }
    CoreMap sentence = new ArrayCoreMap();
    // Required fields
    sentence.set(TokenBeginAnnotation.class, proto.getTokenOffsetBegin());
    sentence.set(TokenEndAnnotation.class, proto.getTokenOffsetEnd());
    // Optional fields
    if (proto.hasSentenceIndex()) { sentence.set(SentenceIndexAnnotation.class, proto.getSentenceIndex()); }
    if (proto.hasCharacterOffsetBegin()) { sentence.set(CharacterOffsetBeginAnnotation.class, proto.getCharacterOffsetBegin()); }
    if (proto.hasCharacterOffsetEnd()) { sentence.set(CharacterOffsetEndAnnotation.class, proto.getCharacterOffsetEnd()); }
    if (proto.hasParseTree()) { sentence.set(TreeAnnotation.class, fromProto(proto.getParseTree())); }
    if (proto.hasBinarizedParseTree()) { sentence.set(BinarizedTreeAnnotation.class, fromProto(proto.getBinarizedParseTree())); }
    if (proto.getKBestParseTreesCount() > 0) {
      List<Tree> trees = proto.getKBestParseTreesList().stream().map(this::fromProto).collect(Collectors.toCollection(LinkedList::new));
      sentence.set(KBestTreesAnnotation.class, trees);
    }
    if (proto.hasAnnotatedParseTree()) { sentence.set(SentimentCoreAnnotations.SentimentAnnotatedTree.class, fromProto(proto.getAnnotatedParseTree())); }
    if (proto.hasSentiment()) { sentence.set(SentimentCoreAnnotations.SentimentClass.class, proto.getSentiment()); }
    // Non-default fields
    if (proto.hasHasRelationAnnotations() && proto.getHasRelationAnnotations()) {
      // set entities
      List<EntityMention> entities = proto.getEntityList().stream().map(entity -> fromProto(entity, sentence)).collect(Collectors.toList());
      sentence.set(EntityMentionsAnnotation.class, entities);
      // set relations
      List<RelationMention> relations = proto.getRelationList().stream().map(relation -> fromProto(relation, sentence)).collect(Collectors.toList());
      sentence.set(RelationMentionsAnnotation.class, relations);
    }

    // if there are mentions for this sentence, add them to the annotation
    loadSentenceMentions(proto, sentence);

    // Return
    return sentence;
  }


  protected void loadSentenceMentions(CoreNLPProtos.Sentence proto, CoreMap sentence) {
    // add all Mentions for this sentence
    if (proto.getHasCorefMentionsAnnotation()) {
      sentence.set(CorefMentionsAnnotation.class, new ArrayList<>());
    }
    if (proto.getMentionsForCorefList().size() != 0) {
      HashMap<Integer, Mention> idToMention = new HashMap<>();
      List<Mention> sentenceMentions = sentence.get(CorefMentionsAnnotation.class);
      // initial set up of all mentions
      for (CoreNLPProtos.Mention protoMention : proto.getMentionsForCorefList()) {
        Mention m = fromProtoNoTokens(protoMention);
        sentenceMentions.add(m);
        idToMention.put(m.mentionID, m);
      }
      // populate sets of Mentions for each Mention
      for (CoreNLPProtos.Mention protoMention : proto.getMentionsForCorefList()) {
        Mention m = idToMention.get(protoMention.getMentionID());
        if (protoMention.getAppositionsList().size() != 0) {
          m.appositions = new HashSet<>();
          m.appositions.addAll(protoMention.getAppositionsList().stream()
              .map(idToMention::get)
              .collect(Collectors.toList()));
        }
        if (protoMention.getPredicateNominativesList().size() != 0) {
          m.predicateNominatives = new HashSet<>();
          m.predicateNominatives.addAll(protoMention.getPredicateNominativesList().stream()
              .map(idToMention::get)
              .collect(Collectors.toList()));
        }
        if (protoMention.getRelativePronounsList().size() != 0) {
          m.relativePronouns = new HashSet<>();
          m.relativePronouns.addAll(protoMention.getRelativePronounsList().stream()
              .map(idToMention::get)
              .collect(Collectors.toList()));
        }
        if (protoMention.getListMembersList().size() != 0) {
          m.listMembers = new HashSet<>();
          m.listMembers.addAll(protoMention.getListMembersList().stream()
              .map(idToMention::get)
              .collect(Collectors.toList()));
        }
        if (protoMention.getBelongToListsList().size() != 0) {
          m.belongToLists = new HashSet<>();
          m.belongToLists.addAll(protoMention.getBelongToListsList().stream()
              .map(idToMention::get)
              .collect(Collectors.toList()));
        }
      }
    }
  }

  /**
   * Returns a complete document, intended to mimic a document passes as input to
   * {@link ProtobufAnnotationSerializer#toProto(Annotation)} as closely as possible.
   * That is, most common fields are serialized, but there is not guarantee that custom additions
   * will be saved and retrieved.
   *
   * @param proto The protocol buffer to read the document from.
   * @return An Annotation corresponding to the read protobuf.
   */
  @SuppressWarnings("deprecation")
  public Annotation fromProto(CoreNLPProtos.Document proto) {
    if (Thread.interrupted()) {
      throw new RuntimeInterruptedException();
    }
    // Set text
    Annotation ann = new Annotation(proto.getText());

    // if there are characters, add characters
    if (proto.getCharacterCount() > 0) {
      List<CoreLabel> docChars = new ArrayList<CoreLabel>();
      for (CoreNLPProtos.Token c : proto.getCharacterList()) {
        docChars.add(fromProto(c));
      }
      ann.set(SegmenterCoreAnnotations.CharactersAnnotation.class, docChars);
    }

    // Add tokens
    List<CoreLabel> tokens = new ArrayList<>();
    if (proto.getSentenceCount() > 0) {
      // Populate the tokens from the sentence
      for (CoreNLPProtos.Sentence sentence : proto.getSentenceList()) {
        // It's conceivable that the sentences are not contiguous -- pad this with nulls
        while (sentence.hasTokenOffsetBegin() && tokens.size() < sentence.getTokenOffsetBegin()) {
          tokens.add(null);
        }
        // Read the sentence
        for (CoreNLPProtos.Token token : sentence.getTokenList()) {
          CoreLabel coreLabel = fromProto(token);
          // Set docid
          if (proto.hasDocID()) { coreLabel.setDocID(proto.getDocID()); }
          if (token.hasTokenBeginIndex() && token.hasTokenEndIndex()) {
            // This is usually true, if enough annotators are defined
            while (tokens.size() < sentence.getTokenOffsetEnd()) {
              tokens.add(null);
            }
            for (int i = token.getTokenBeginIndex(); i < token.getTokenEndIndex(); ++i) {
              tokens.set(token.getTokenBeginIndex(), coreLabel);
            }
          } else {
            // Assume this token spans a single token, and just add it to the tokens list
            tokens.add(coreLabel);
          }
        }
      }
    } else if (proto.getSentencelessTokenCount() > 0) {
      // Eek -- no sentences. Try to recover tokens directly
      if (proto.getSentencelessTokenCount() > 0) {
        for (CoreNLPProtos.Token token : proto.getSentencelessTokenList()) {
          CoreLabel coreLabel = fromProto(token);
          // Set docid
          if (proto.hasDocID()) { coreLabel.setDocID(proto.getDocID()); }
          tokens.add(coreLabel);
        }
      }
    }
    if (!tokens.isEmpty()) { ann.set(TokensAnnotation.class, tokens); }

    // Add sentences
    List<CoreMap> sentences = new ArrayList<>(proto.getSentenceCount());
    for (int sentIndex = 0; sentIndex < proto.getSentenceCount(); ++sentIndex) {
      CoreNLPProtos.Sentence sentence = proto.getSentence(sentIndex);
      CoreMap map = fromProtoNoTokens(sentence);
      if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd() &&
          map.get(TokensAnnotation.class) == null) {
        // Set tokens for sentence
        int tokenBegin = sentence.getTokenOffsetBegin();
        int tokenEnd = sentence.getTokenOffsetEnd();
        assert tokenBegin <= tokens.size() && tokenBegin <= tokenEnd;
        assert tokenEnd <= tokens.size();
        map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
        // Set sentence index + token index + paragraph index
        for (int i = tokenBegin; i < tokenEnd; ++i) {
          tokens.get(i).setSentIndex(sentIndex);
          tokens.get(i).setIndex(i - sentence.getTokenOffsetBegin() + 1);
          if (sentence.hasParagraph()) { tokens.get(i).set(ParagraphAnnotation.class, sentence.getParagraph()); }
        }
        // Set text
        int characterBegin = sentence.getCharacterOffsetBegin();
        int characterEnd = sentence.getCharacterOffsetEnd();
        if (characterEnd <= proto.getText().length()) {
          // The usual case -- get the text from the document text
          map.set(TextAnnotation.class, proto.getText().substring(characterBegin, characterEnd));
        } else {
          // The document text is wrong -- guess the text from the tokens
          map.set(TextAnnotation.class, recoverOriginalText(tokens.subList(tokenBegin, tokenEnd), sentence));
        }
      }
      // End iteration
      sentences.add(map);
    }
    if (!sentences.isEmpty()) { ann.set(SentencesAnnotation.class, sentences); }

    // Set DocID
    String docid = null;
    if (proto.hasDocID()) {
      docid = proto.getDocID();
      ann.set(DocIDAnnotation.class, docid);
    }
    // Set reference time
    if (proto.hasDocDate()) {
      ann.set(DocDateAnnotation.class, proto.getDocDate());
    }
    if (proto.hasCalendar()) {
      GregorianCalendar calendar = new GregorianCalendar();
      calendar.setTimeInMillis(proto.getCalendar());
      ann.set(CalendarAnnotation.class, calendar);
    }

    // Set coref chain
    Map<Integer, CorefChain> corefChains = new HashMap<>();
    for (CoreNLPProtos.CorefChain chainProto : proto.getCorefChainList()) {
      CorefChain chain = fromProto(chainProto, ann);
      corefChains.put(chain.getChainID(), chain);
    }
    if (!corefChains.isEmpty()) { ann.set(CorefChainAnnotation.class, corefChains); }

    // hashes to access Mentions , later in this method need to add speakerInfo to Mention
    // so we need to create id -> Mention, CoreNLPProtos.Mention maps to do this, since SpeakerInfo could reference
    // any Mention in doc
    HashMap<Integer, Mention> idToMention = new HashMap<>();
    HashMap<Integer, CoreNLPProtos.Mention> idToProtoMention = new HashMap<>();

    // Set things in the sentence that need a document context.
    for (int sentenceIndex = 0; sentenceIndex < proto.getSentenceCount(); ++sentenceIndex) {
      CoreNLPProtos.Sentence sentence = proto.getSentenceList().get(sentenceIndex);
      CoreMap map = sentences.get(sentenceIndex);
      List<CoreLabel> sentenceTokens = map.get(TokensAnnotation.class);
      // Set dependency graphs
      if (sentence.hasBasicDependencies()) {
        map.set(BasicDependenciesAnnotation.class, fromProto(sentence.getBasicDependencies(), sentenceTokens, docid));
      }
      if (sentence.hasCollapsedDependencies()) {
        map.set(CollapsedDependenciesAnnotation.class, fromProto(sentence.getCollapsedDependencies(), sentenceTokens, docid));
      }
      if (sentence.hasCollapsedCCProcessedDependencies()) {
        map.set(CollapsedCCProcessedDependenciesAnnotation.class, fromProto(sentence.getCollapsedCCProcessedDependencies(), sentenceTokens, docid));
      }
      if (sentence.hasAlternativeDependencies()) {
        map.set(AlternativeDependenciesAnnotation.class, fromProto(sentence.getAlternativeDependencies(), sentenceTokens, docid));
      }
      if (sentence.hasEnhancedDependencies()) {
        map.set(EnhancedDependenciesAnnotation.class, fromProto(sentence.getEnhancedDependencies(), sentenceTokens, docid));
      }
      if (sentence.hasEnhancedPlusPlusDependencies()) {
        map.set(EnhancedPlusPlusDependenciesAnnotation.class, fromProto(sentence.getEnhancedPlusPlusDependencies(), sentenceTokens, docid));
      }
      // Set entailed sentences
      if (sentence.getEntailedSentenceCount() > 0) {
        Set<SentenceFragment> entailedSentences = sentence.getEntailedSentenceList().stream().map(frag -> fromProto(frag, map.get(EnhancedPlusPlusDependenciesAnnotation.class))).collect(Collectors.toSet());
        map.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, entailedSentences);
      }
      if (sentence.getEntailedClauseCount() > 0) {
        Set<SentenceFragment> entailedClauses = sentence.getEntailedClauseList().stream().map(frag -> fromProto(frag, map.get(CollapsedDependenciesAnnotation.class))).collect(Collectors.toSet());
        map.set(NaturalLogicAnnotations.EntailedClausesAnnotation.class, entailedClauses);
      }
      // Set relation triples
      if (sentence.getOpenieTripleCount() > 0) {
        List<RelationTriple> triples = new ArrayList<>();
        for (CoreNLPProtos.RelationTriple triple : sentence.getOpenieTripleList()) {
          triples.add(fromProto(triple, ann, sentenceIndex));
        }
        map.set(NaturalLogicAnnotations.RelationTriplesAnnotation.class, triples);
      }
      // Redo some light annotation
      if ( map.containsKey(TokensAnnotation.class) &&
          (!sentence.hasHasNumerizedTokensAnnotation() || sentence.getHasNumerizedTokensAnnotation())) {
        map.set(NumerizedTokensAnnotation.class, NumberNormalizer.findAndMergeNumbers(map));
      }
      // add the CoreLabel and IndexedWord info to each mention
      // when Mentions are serialized, just storing the index in the sentence for CoreLabels and IndexedWords
      // this is the point where the de-serialized sentence has tokens
      int mentionInt = 0;
      for (CoreNLPProtos.Mention protoMention : sentence.getMentionsForCorefList()) {
        // get the mention
        Mention mentionToUpdate = map.get(CorefMentionsAnnotation.class).get(mentionInt);
        // store these in hash for more processing later in this method
        idToMention.put(mentionToUpdate.mentionID, mentionToUpdate);
        idToProtoMention.put(mentionToUpdate.mentionID, protoMention);
        // update the values
        int headIndexedWordIndex = protoMention.getHeadIndexedWord().getTokenIndex();
        if (headIndexedWordIndex >= 0) {
          mentionToUpdate.headIndexedWord = new IndexedWord(sentenceTokens.get(protoMention.getHeadIndexedWord().getTokenIndex()));
          mentionToUpdate.headIndexedWord.setCopyCount(protoMention.getHeadIndexedWord().getCopyCount());
        }
        int dependingVerbIndex = protoMention.getDependingVerb().getTokenIndex();
        if (dependingVerbIndex >= 0) {
          mentionToUpdate.dependingVerb = new IndexedWord(sentenceTokens.get(protoMention.getDependingVerb().getTokenIndex()));
          mentionToUpdate.dependingVerb.setCopyCount(protoMention.getDependingVerb().getCopyCount());
        }
        int headWordIndex = protoMention.getHeadWord().getTokenIndex();
        if (headWordIndex >= 0) {
          mentionToUpdate.headWord = sentenceTokens.get(protoMention.getHeadWord().getTokenIndex());
        }
        mentionToUpdate.sentenceWords = new ArrayList<>();
        for (CoreNLPProtos.IndexedWord clp : protoMention.getSentenceWordsList()) {
          int ti = clp.getTokenIndex();
          mentionToUpdate.sentenceWords.add(sentenceTokens.get(ti));
        }
        mentionToUpdate.originalSpan = new ArrayList<>();
        for (CoreNLPProtos.IndexedWord clp : protoMention.getOriginalSpanList()) {
          int ti = clp.getTokenIndex();
          mentionToUpdate.originalSpan.add(sentenceTokens.get(ti));
        }
        if (protoMention.getHasBasicDependency()) {
          mentionToUpdate.basicDependency = map.get(BasicDependenciesAnnotation.class);
        }
        if (protoMention.getHasEnhancedDepenedncy()) {
          mentionToUpdate.enhancedDependency = map.get(EnhancedDependenciesAnnotation.class);
        }
        if (protoMention.getHasContextParseTree()) {
          mentionToUpdate.contextParseTree = map.get(TreeAnnotation.class);
        }
        // move on to next mention
        mentionInt++;
      }

    }

    // Set quotes
    List<CoreMap> quotes = proto.getQuoteList().stream().map(quote -> fromProto(quote, tokens)).collect(Collectors.toList());
    if (!quotes.isEmpty()) {
      ann.set(QuotationsAnnotation.class, quotes);
    }

    // Set NERmention
    List<CoreMap> mentions = proto.getMentionsList().stream().map(this::fromProto).collect(Collectors.toList());
    if (!mentions.isEmpty()) {
      ann.set(MentionsAnnotation.class, mentions);
    }

    // add SpeakerInfo stuff to Mentions, this requires knowing all mentions in the document
    // also add all the Set<Mention>
    for (int mentionID : idToMention.keySet()) {
      // this is the Mention message corresponding to this Mention
      Mention mentionToUpdate = idToMention.get(mentionID);
      CoreNLPProtos.Mention correspondingProtoMention = idToProtoMention.get(mentionID);
      if (!correspondingProtoMention.hasSpeakerInfo()) {
        // keep speakerInfo null for this Mention if it didn't store a speakerInfo
        // so just continue to next Mention
        continue;
      }
      // if we're here we know a speakerInfo was stored
      SpeakerInfo speakerInfo = fromProto(correspondingProtoMention.getSpeakerInfo());
      // go through all ids stored for the speakerInfo in its mentions list, and get the Mention
      // Mentions are stored by MentionID , MentionID should be set by MentionAnnotator
      // MentionID is ID in document, 0, 1, 2, etc...
      for (int speakerInfoMentionID : correspondingProtoMention.getSpeakerInfo().getMentionsList()) {
        speakerInfo.addMention(idToMention.get(speakerInfoMentionID));
      }
      // now the SpeakerInfo for this Mention should be fully restored
      mentionToUpdate.speakerInfo = speakerInfo;
    }

    // add section info
    ann.set(SectionsAnnotation.class, new ArrayList<CoreMap>());
    for (CoreNLPProtos.Section section : proto.getSectionsList()) {
      ann.get(SectionsAnnotation.class).add(fromProto(section, ann.get(SentencesAnnotation.class)));
    }
    // Return
    return ann;
  }

  /**
   * Retrieve a Tree object from a saved protobuf.
   * This is not intended to be used on its own, but it is safe (lossless) to do so and therefore it is
   * left visible.
   *
   * @param proto The serialized tree.
   * @return A Tree object corresponding to the saved tree. This will always be a {@link LabeledScoredTreeNode}.
   */
  public Tree fromProto(CoreNLPProtos.ParseTree proto) {
    if (Thread.interrupted()) {
      throw new RuntimeInterruptedException();
    }
    LabeledScoredTreeNode node = new LabeledScoredTreeNode();
    // Set label
    if (proto.hasValue()) {
      CoreLabel value = new CoreLabel();
      value.setCategory(proto.getValue());
      value.setValue(proto.getValue());
      node.setLabel(value);
      // Set span
      if (proto.hasYieldBeginIndex() && proto.hasYieldEndIndex()) {
        IntPair span = new IntPair(proto.getYieldBeginIndex(), proto.getYieldEndIndex());
        value.set(SpanAnnotation.class, span);
      }
      // Set sentiment
      if (proto.hasSentiment()) {
        value.set(RNNCoreAnnotations.PredictedClass.class, proto.getSentiment().getNumber());
      }
    }
    // Set score
    if (proto.hasScore()) { node.setScore(proto.getScore()); }
    // Set children
    Tree[] children = new LabeledScoredTreeNode[proto.getChildCount()];
    for (int i = 0; i < children.length; ++i) {
      children[i] = fromProto(proto.getChild(i));
    }
    node.setChildren(children);
    // Return
    return node;
  }

  /**
   * Return a CoreNLP language from a Protobuf language
   */
  public static Language fromProto(CoreNLPProtos.Language lang) {
    switch (lang) {
      case Arabic:
        return Language.Arabic;
      case Chinese:
        return Language.Chinese;
      case English:
        return Language.English;
      case German:
        return Language.German;
      case French:
        return Language.French;
      case Hebrew:
        return Language.Hebrew;
      case Spanish:
        return Language.Spanish;
      case UniversalChinese:
        return Language.UniversalChinese;
      case UniversalEnglish:
        return Language.UniversalEnglish;
      case Unknown:
        return Language.Unknown;
      case Any:
        return Language.Any;
      default:
        throw new IllegalStateException("Unknown language: " + lang);
    }
  }

  /**
   * Return a CoreNLP Operator (Natural Logic operator) from a Protobuf operator
   */
  public static OperatorSpec fromProto(CoreNLPProtos.Operator operator) {
    String opName = operator.getName().toLowerCase();
    Operator op = null;
    for (Operator candidate : Operator.values()) {
      if (candidate.name().toLowerCase().equals(opName)) {
        op = candidate;
        break;
      }
    }
    return new OperatorSpec(op, operator.getQuantifierSpanBegin(), operator.getQuantifierSpanEnd(),
        operator.getSubjectSpanBegin(), operator.getSubjectSpanEnd(),
        operator.getObjectSpanBegin(), operator.getObjectSpanEnd());
  }

  /**
   * Return a CoreNLP Polarity (Natural Logic polarity) from a Protobuf operator
   */
  public static Polarity fromProto(CoreNLPProtos.Polarity polarity) {
    byte[] projectionFn = new byte[7];
    projectionFn[0] = (byte) polarity.getProjectEquivalence().getNumber();
    projectionFn[1] = (byte) polarity.getProjectForwardEntailment().getNumber();
    projectionFn[2] = (byte) polarity.getProjectReverseEntailment().getNumber();
    projectionFn[3] = (byte) polarity.getProjectNegation().getNumber();
    projectionFn[4] = (byte) polarity.getProjectAlternation().getNumber();
    projectionFn[5] = (byte) polarity.getProjectCover().getNumber();
    projectionFn[6] = (byte) polarity.getProjectIndependence().getNumber();
    return new Polarity(projectionFn);
  }


  /**
   * Deserialize a dependency tree, allowing for cross-sentence arcs.
   * This is primarily here for deserializing OpenIE triples.
   *
   * @see ProtobufAnnotationSerializer#fromProto(CoreNLPProtos.DependencyGraph, List, String)
   */
  private static SemanticGraph fromProto(CoreNLPProtos.DependencyGraph proto, List<CoreLabel> sentence, String docid, Optional<Annotation> document) {
    SemanticGraph graph = new SemanticGraph();

    // first construct the actual nodes; keep them indexed by their index
    // This block is optimized as one of the places which take noticeable time
    // in datum caching
    int min = Integer.MAX_VALUE;
    int max = Integer.MIN_VALUE;
    for(CoreNLPProtos.DependencyGraph.Node in: proto.getNodeList()){
      min = in.getIndex() < min ? in.getIndex() : min;
      max = in.getIndex() > max ? in.getIndex() : max;
    }
    TwoDimensionalMap<Integer, Integer, IndexedWord> nodes = TwoDimensionalMap.hashMap();
    for(CoreNLPProtos.DependencyGraph.Node in: proto.getNodeList()){
      CoreLabel token;
      if (document.isPresent()) {
        token = document.get().get(SentencesAnnotation.class).get(in.getSentenceIndex()).get(TokensAnnotation.class).get(in.getIndex() - 1); // token index starts at 1!
      } else {
        token = sentence.get(in.getIndex() - 1); // index starts at 1!
      }
      IndexedWord word;
      if (in.hasCopyAnnotation() && in.getCopyAnnotation() > 0) {
        // TODO: if we make a copy wrapper CoreLabel, use it here instead
        word = new IndexedWord(new CoreLabel(token));
        word.setCopyCount(in.getCopyAnnotation());
      } else {
        word = new IndexedWord(token);
      }

      // for backwards compatibility - new annotations should have
      // these fields set, but annotations older than August 2014 might not
      if (word.docID() == null && docid != null) {
        word.setDocID(docid);
      }
      if (word.sentIndex() < 0 && in.getSentenceIndex() >= 0) {
        word.setSentIndex(in.getSentenceIndex());
      }
      if (word.index() < 0 && in.getIndex() >= 0) {
        word.setIndex(in.getIndex());
      }

      assert in.getIndex() == word.index();
      nodes.put(in.getIndex(), in.getCopyAnnotation(), word);
      graph.addVertex(word);
    }

    // add all edges to the actual graph
    for(CoreNLPProtos.DependencyGraph.Edge ie: proto.getEdgeList()){
      IndexedWord source = nodes.get(ie.getSource(), ie.getSourceCopy());
      assert(source != null);
      IndexedWord target = nodes.get(ie.getTarget(), ie.getTargetCopy());
      assert(target != null);
      synchronized (globalLock) {
        // this is not thread-safe: there are static fields in GrammaticalRelation
        assert ie.hasDep();
        GrammaticalRelation rel = GrammaticalRelation.valueOf(fromProto(ie.getLanguage()), ie.getDep());
        graph.addEdge(source, target, rel, 1.0, ie.hasIsExtra() && ie.getIsExtra());
      }
    }

    if (proto.getRootCount() > 0) {
      Collection<IndexedWord> roots = proto.getRootList().stream().map(rootI -> nodes.get(rootI, 0)).collect(Collectors.toList());
      graph.setRoots(roots);
    } else {
      // Roots were not saved away
      // compute root nodes if non-empty
      if(!graph.isEmpty()){
        graph.resetRoots();
      }
    }
    return graph;

  }


  /**
   * Voodoo magic to convert a serialized dependency graph into a {@link SemanticGraph}.
   * This method is intended to be called only from the {@link ProtobufAnnotationSerializer#fromProto(CoreNLPProtos.Document)}
   * method.
   *
   * @param proto The serialized representation of the graph. This relies heavily on indexing into the original document.
   * @param sentence The raw sentence that this graph was saved from must be provided, as it is not saved in the serialized
   *                 representation.
   * @param docid A docid must be supplied, as it is not saved by the serialized representation.
   * @return A semantic graph corresponding to the saved object, on the provided sentence.
   */
  public static SemanticGraph fromProto(CoreNLPProtos.DependencyGraph proto, List<CoreLabel> sentence, String docid) {
    return fromProto(proto, sentence, docid, Optional.empty());
  }


  /**
   * Return a  {@link RelationTriple} object from the serialized representation.
   * This requires a sentence and a document so that
   * (1) we have a docid for the dependency tree can be accurately rebuilt,
   * and (2) we have references to the tokens to include in the relation triple.
   *
   * @param proto The serialized relation triples.
   * @param doc The document we are deserializing. This document should already
   *            have a docid annotation set, if there is one.
   * @param sentenceIndex The index of the sentence this extraction should be attached to.
   *
   * @return A relation triple as a Java object, corresponding to the seriaized proto.
   */
  public static RelationTriple fromProto(CoreNLPProtos.RelationTriple proto, Annotation doc, int sentenceIndex) {
    if (Thread.interrupted()) {
      throw new RuntimeInterruptedException();
    }
    // Get the spans for the extraction
    List<CoreLabel> subject = proto.getSubjectTokensList().stream().map(loc ->
        doc.get(SentencesAnnotation.class).get(loc.getSentenceIndex()).get(TokensAnnotation.class).get(loc.getTokenIndex())
        ).collect(Collectors.toList());
    List<CoreLabel> relation;
    if (proto.getRelationTokensCount() == 0) {  // If we don't have a real span for the relation, make a dummy word
      relation = Collections.singletonList(new CoreLabel(new Word(proto.getRelation())));
    } else {
      relation = proto.getRelationTokensList().stream().map(loc ->
          doc.get(SentencesAnnotation.class).get(loc.getSentenceIndex()).get(TokensAnnotation.class).get(loc.getTokenIndex())
      ).collect(Collectors.toList());
    }
    List<CoreLabel> object = proto.getObjectTokensList().stream().map(loc ->
        doc.get(SentencesAnnotation.class).get(loc.getSentenceIndex()).get(TokensAnnotation.class).get(loc.getTokenIndex())
    ).collect(Collectors.toList());

    // Create the extraction
    RelationTriple extraction;
    double confidence = proto.getConfidence();
    if (proto.hasTree()) {
      SemanticGraph tree = fromProto(
          proto.getTree(),
          doc.get(SentencesAnnotation.class).get(sentenceIndex).get(TokensAnnotation.class),
          doc.get(DocIDAnnotation.class),
          Optional.of(doc));
      extraction =  new RelationTriple.WithTree(subject, relation, object, tree, confidence);
    } else {
      extraction = new RelationTriple(subject, relation, object, confidence);
    }

    // Tweak the extraction
    if (proto.hasIstmod()) { extraction.istmod(proto.getIstmod()); }
    if (proto.hasPrefixBe()) { extraction.isPrefixBe(proto.getPrefixBe()); }
    if (proto.hasSuffixBe()) { extraction.isSuffixBe(proto.getSuffixBe()); }
    if (proto.hasSuffixOf()) { extraction.isSuffixOf(proto.getSuffixOf()); }

    // Return
    return extraction;
  }

  /**
   * Returns a sentence fragment from a given protocol buffer, and an associated parse tree.
   *
   * @param fragment The saved sentence fragment.
   * @param tree The parse tree for the whole sentence.
   *
   * @return A {@link SentenceFragment} object corresponding to the saved proto.
   */
  public static SentenceFragment fromProto(CoreNLPProtos.SentenceFragment fragment, SemanticGraph tree) {
    if (Thread.interrupted()) {
      throw new RuntimeInterruptedException();
    }
    SemanticGraph fragmentTree = new SemanticGraph(tree);
    // Set the new root
    if (fragment.hasRoot()) {
      fragmentTree.resetRoots();
      fragmentTree.vertexSet().stream()
          .filter(vertex -> vertex.index() - 1 == fragment.getRoot())
          .forEach(fragmentTree::setRoot);
    }
    // Set the new vertices
    Set<Integer> keptIndices = new HashSet<>(fragment.getTokenIndexList());
    tree.vertexSet().stream()
        .filter(vertex -> !keptIndices.contains(vertex.index() - 1))
        .forEach(fragmentTree::removeVertex);
    // Apparently this sometimes screws up the tree
    fragmentTree.vertexSet().stream()
        .filter(vertex -> fragmentTree.getFirstRoot() != vertex &&
            tree.getFirstRoot() != vertex &&
            !fragmentTree.incomingEdgeIterable(vertex).iterator().hasNext())
        .forEach(vertex -> {
          SemanticGraphEdge edge = tree.incomingEdgeIterable(vertex).iterator().next();
          fragmentTree.addEdge(fragmentTree.getFirstRoot(), edge.getDependent(), edge.getRelation(),
              edge.getWeight(), edge.isExtra());
        });
    // Return the fragment
    //noinspection SimplifiableConditionalExpression
    return new SentenceFragment(fragmentTree,
        fragment.hasAssumedTruth() ? fragment.getAssumedTruth() : true,
        false)
        .changeScore(fragment.hasScore() ? fragment.getScore() : 1.0);
  }

  /**
   * Convert a serialized Map back into a Java Map.
   *
   * @param proto The serialized map.
   *
   * @return A Java Map corresponding to the serialized map.
   */
  public static HashMap<String, String> fromProto(CoreNLPProtos.MapStringString proto) {
    HashMap<String, String> map = new HashMap<>();
    for (int i = 0; i < proto.getKeyCount(); ++i) {
      map.put(proto.getKey(i), proto.getValue(i));
    }
    return map;
  }

  /**
   * Convert a serialized Map back into a Java Map.
   *
   * @param proto The serialized map.
   *
   * @return A Java Map corresponding to the serialized map.
   */
  public static HashMap<Integer, String> fromProto(CoreNLPProtos.MapIntString proto) {
      HashMap<Integer, String> map = new HashMap<>();
      for (int i = 0; i < proto.getKeyCount(); ++i) {
          map.put(proto.getKey(i), proto.getValue(i));
      }
      return map;
  }

  /**
   * Read a CorefChain from its serialized representation.
   * This is private due to the need for an additional partial document. Also, why on Earth are you trying to use
   * this on its own anyways?
   * @param proto The serialized representation of the coref chain, missing information on its mention span string.
   * @param partialDocument A partial document, which must contain {@link SentencesAnnotation} and {@link TokensAnnotation} in
   *                        order to fill in the mention span strings.
   * @return A coreference chain.
   */
  private CorefChain fromProto(CoreNLPProtos.CorefChain proto, Annotation partialDocument) {
    // Get chain ID
    int cid = proto.getChainID();
    // Get mentions
    Map<IntPair, Set<CorefChain.CorefMention>> mentions = new HashMap<>();
    CorefChain.CorefMention representative = null;
    for (int i = 0; i < proto.getMentionCount(); ++i) {
      if (Thread.interrupted()) {
        throw new RuntimeInterruptedException();
      }
      CoreNLPProtos.CorefChain.CorefMention mentionProto = proto.getMention(i);
      // Create mention
      StringBuilder mentionSpan = new StringBuilder();
      List<CoreLabel> sentenceTokens = partialDocument.get(SentencesAnnotation.class).get(mentionProto.getSentenceIndex()).get(TokensAnnotation.class);
      for (int k = mentionProto.getBeginIndex(); k < mentionProto.getEndIndex(); ++k) {
        mentionSpan.append(" ").append(sentenceTokens.get(k).word());
      }
      // Set the coref cluster id for the token
      CorefChain.CorefMention mention = new CorefChain.CorefMention(
          Dictionaries.MentionType.valueOf(mentionProto.getMentionType()),
          Dictionaries.Number.valueOf(mentionProto.getNumber()),
          Dictionaries.Gender.valueOf(mentionProto.getGender()),
          Dictionaries.Animacy.valueOf(mentionProto.getAnimacy()),
          mentionProto.getBeginIndex() + 1,
          mentionProto.getEndIndex() + 1,
          mentionProto.getHeadIndex() + 1,
          cid,
          mentionProto.getMentionID(),
          mentionProto.getSentenceIndex() + 1,
          new IntTuple(new int[]{ mentionProto.getSentenceIndex() + 1, mentionProto.getPosition() }),
          mentionSpan.substring(mentionSpan.length() > 0 ? 1 : 0));
      // Register mention
      IntPair key = new IntPair(mentionProto.getSentenceIndex() - 1, mentionProto.getHeadIndex() - 1);
      if (!mentions.containsKey(key)) { mentions.put(key, new HashSet<>()); }
      mentions.get(key).add(mention);
      // Check for representative
      if (proto.hasRepresentative() && i == proto.getRepresentative()) {
        representative = mention;
      }
    }
    // Return
    return new CorefChain(cid, mentions, representative);
  }

  private Mention fromProtoNoTokens(CoreNLPProtos.Mention protoMention) {
    Mention returnMention = new Mention();
    // set enums
    if (protoMention.getMentionType() != null && !protoMention.getMentionType().equals("")) {
      returnMention.mentionType = Dictionaries.MentionType.valueOf(protoMention.getMentionType());
    }
    if (protoMention.getNumber() != null && !protoMention.getNumber().equals("")) {
      returnMention.number = Dictionaries.Number.valueOf(protoMention.getNumber());
    }
    if (protoMention.getGender() != null && !protoMention.getGender().equals("")) {
      returnMention.gender = Dictionaries.Gender.valueOf(protoMention.getGender());
    }
    if (protoMention.getAnimacy() != null && !protoMention.getAnimacy().equals("")) {
      returnMention.animacy = Dictionaries.Animacy.valueOf(protoMention.getAnimacy());
    }
    if (protoMention.getPerson() != null && !protoMention.getPerson().equals("")) {
      returnMention.person = Dictionaries.Person.valueOf(protoMention.getPerson());
    }

    // TO DO: if the original Mention had "" for this field it will be lost, should deal with this problem
    if (!protoMention.getHeadString().equals("")) {
      returnMention.headString = protoMention.getHeadString();
    }
    // TO DO: if the original Mention had "" for this field it will be lost, should deal with this problem
    if (!protoMention.getNerString().equals("")) {
      returnMention.nerString = protoMention.getNerString();
    }

    returnMention.startIndex = protoMention.getStartIndex();
    returnMention.endIndex = protoMention.getEndIndex();
    returnMention.headIndex = protoMention.getHeadIndex();
    returnMention.mentionID = protoMention.getMentionID();
    returnMention.originalRef = protoMention.getOriginalRef();

    returnMention.goldCorefClusterID = protoMention.getGoldCorefClusterID();
    returnMention.corefClusterID = protoMention.getCorefClusterID();
    returnMention.mentionNum = protoMention.getMentionNum();
    returnMention.sentNum = protoMention.getSentNum();
    returnMention.utter = protoMention.getUtter();
    returnMention.paragraph = protoMention.getParagraph();
    returnMention.isSubject = protoMention.getIsSubject();
    returnMention.isDirectObject = protoMention.getIsDirectObject();
    returnMention.isIndirectObject = protoMention.getIsIndirectObject();
    returnMention.isPrepositionObject = protoMention.getIsPrepositionObject();
    returnMention.hasTwin = protoMention.getHasTwin();
    returnMention.generic = protoMention.getGeneric();
    returnMention.isSingleton = protoMention.getIsSingleton();

    // handle the sets of Strings
    if (protoMention.getDependentsCount() != 0) {
      returnMention.dependents = new HashSet<>();
      returnMention.dependents.addAll(protoMention.getDependentsList());
    }

    if (protoMention.getPreprocessedTermsCount() != 0) {
      returnMention.preprocessedTerms = new ArrayList<>();
      returnMention.preprocessedTerms.addAll(protoMention.getPreprocessedTermsList());
    }

    return returnMention;
  }

  private SpeakerInfo fromProto(CoreNLPProtos.SpeakerInfo speakerInfo) {
    String speakerName = speakerInfo.getSpeakerName();
    return new SpeakerInfo(speakerName);
  }

  /**
   * Create an internal Timex object from the serialized protocol buffer.
   * @param proto The serialized protocol buffer to read from.
   * @return A timex, with as much information filled in as was gleaned from the protocol buffer.
   */
  private Timex fromProto(CoreNLPProtos.Timex proto) {
    return new Timex(
        proto.hasType() ? proto.getType() : null,
        proto.hasValue() ? proto.getValue() : null,
        proto.hasAltValue() ? proto.getAltValue() : null,
        proto.hasTid() ? proto.getTid() : null,
        proto.hasText() ? proto.getText() : null,
        proto.hasBeginPoint() ? proto.getBeginPoint() : -1,
        proto.hasEndPoint() ? proto.getEndPoint() : -1);
  }

  /**
   * Read a entity mention from its serialized form. Requires the containing sentence to be
   * passed in along with the protocol buffer.
   * @param proto The serialized entity mention.
   * @param sentence The sentence this mention is attached to.
   * @return The entity mention corresponding to the serialized object.
   */
  private EntityMention fromProto(CoreNLPProtos.Entity proto, CoreMap sentence) {
    EntityMention rtn = new EntityMention(
        proto.hasObjectID() ? proto.getObjectID() : null,
        sentence,
        proto.hasHeadStart() ? new Span(proto.getHeadStart(), proto.getHeadEnd()) : null,
        proto.hasHeadEnd() ? new Span(proto.getExtentStart(), proto.getExtentEnd()) : null,
        proto.hasType() ? proto.getType() : null,
        proto.hasSubtype() ? proto.getSubtype() : null,
        proto.hasMentionType() ? proto.getMentionType() : null );
    if (proto.hasNormalizedName()) { rtn.setNormalizedName(proto.getNormalizedName()); }
    if (proto.hasHeadTokenIndex()) { rtn.setHeadTokenPosition(proto.getHeadTokenIndex()); }
    if (proto.hasCorefID()) { rtn.setCorefID(proto.getCorefID()); }
    return rtn;
  }

  /**
   * Read a relation mention from its serialized form. Requires the containing sentence to be
   * passed in along with the protocol buffer.
   * @param proto The serialized relation mention.
   * @param sentence The sentence this mention is attached to.
   * @return The relation mention corresponding to the serialized object.
   */
  private RelationMention fromProto(CoreNLPProtos.Relation proto, CoreMap sentence) {
    List<ExtractionObject> args = proto.getArgList().stream().map(arg -> fromProto(arg, sentence)).collect(Collectors.toList());
    RelationMention rtn = new RelationMention(
        proto.hasObjectID() ? proto.getObjectID() : null,
        sentence,
        proto.hasExtentStart() ? new Span(proto.getExtentStart(), proto.getExtentEnd()) : null,
        proto.hasType() ? proto.getType() : null,
        proto.hasSubtype() ? proto.getSubtype() : null,
        args);
    if (proto.hasSignature()) { rtn.setSignature(proto.getSignature()); }
    if (proto.getArgNameCount() > 0 || proto.getArgCount() == 0) {
      rtn.setArgNames(proto.getArgNameList());
    }
    return rtn;
  }

  /**
   * Convert a quote object to a protocol buffer.
   */
  @SuppressWarnings("UnusedParameters")
  private static Annotation fromProto(CoreNLPProtos.Quote quote, List<CoreLabel> tokens) {
    List<CoreLabel> quotedTokens = null;
    // note[gabor]: This works, but apparently isn't the behavior of the quote annotator?
//    if (quote.hasTokenBegin() && quote.hasTokenEnd() && quote.getTokenBegin() >= 0 && quote.getTokenEnd() >= 0) {
//      quotedTokens = tokens.subList(quote.getTokenBegin(), quote.getTokenEnd());
//    }
    @SuppressWarnings("ConstantConditions")
    Annotation ann = QuoteAnnotator.makeQuote(
        quote.hasText() ? quote.getText() : null,
        quote.hasBegin() ? quote.getBegin() : -1,
        quote.hasEnd() ? quote.getEnd() : -1,
        quotedTokens,
        quote.hasTokenBegin() ? quote.getTokenBegin() : -1,
        quote.hasSentenceBegin() ? quote.getSentenceBegin() : -1,
        quote.hasSentenceEnd() ? quote.getSentenceEnd() : -1,
        quote.hasDocid() ? quote.getDocid() : null);
    if (quote.hasIndex()) { ann.set(QuotationIndexAnnotation.class, quote.getIndex()); }
    if (quote.hasTokenBegin()) { ann.set(TokenBeginAnnotation.class, quote.getTokenBegin()); }
    if (quote.hasTokenEnd()) { ann.set(TokenEndAnnotation.class, quote.getTokenEnd()); }
    return ann;
  }

  /**
   * Convert a quote object to a protocol buffer.
   */
  @SuppressWarnings("UnusedParameters")
  private CoreMap fromProto(CoreNLPProtos.NERMention mention) {
    CoreMap map = new ArrayCoreMap();
    if (mention.hasSentenceIndex()) map.set(SentenceIndexAnnotation.class, mention.getSentenceIndex());
    if (mention.hasTokenStartInSentenceInclusive()) map.set(TokenBeginAnnotation.class, mention.getTokenStartInSentenceInclusive());
    if (mention.hasTokenEndInSentenceExclusive()) map.set(TokenEndAnnotation.class, mention.getTokenEndInSentenceExclusive());
    if (mention.hasNer()) map.set(NamedEntityTagAnnotation.class, mention.getNer());
    if (mention.hasNormalizedNER()) map.set(NormalizedNamedEntityTagAnnotation.class, mention.getNormalizedNER());
    if (mention.hasEntityType()) map.set(EntityTypeAnnotation.class, mention.getEntityType());
    if (mention.hasTimex()) map.set(TimexAnnotation.class, fromProto(mention.getTimex()));
    if (mention.hasWikipediaEntity()) map.set(WikipediaEntityAnnotation.class, mention.getWikipediaEntity());

    return map;
  }

  /**
   * Read a section coremap from its serialized form. Requires the containing sentence to be
   * passed in along with the protocol buffer.
   * @param section The serialized section coremap
   * @return The relation mention corresponding to the serialized object.
   */
  private CoreMap fromProto(CoreNLPProtos.Section section, List<CoreMap> annotationSentences) {
    CoreMap map = new ArrayCoreMap();
    map.set(CharacterOffsetBeginAnnotation.class, section.getCharBegin());
    map.set(CharacterOffsetEndAnnotation.class, section.getCharEnd());
    if (section.hasAuthor())
      map.set(AuthorAnnotation.class, section.getAuthor());
    if (section.hasDatetime())
      map.set(SectionDateAnnotation.class, section.getDatetime());
    // go through the list of sentences and add them to this section's sentence list
    ArrayList<CoreMap> sentencesList = new ArrayList<>();
    for (int sentenceIndex : section.getSentenceIndexesList()) {
      sentencesList.add(annotationSentences.get(sentenceIndex));
    }
    map.set(SentencesAnnotation.class, sentencesList);
    return map;
  }

  /**
   * Recover the {@link edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation} field of a sentence
   * from the tokens. This is useful if the text was not set in the protocol buffer, and therefore
   * needs to be reconstructed from tokens.
   *
   * @param tokens The list of tokens representing this sentence.
   * @return The original text of the sentence.
   */
  protected String recoverOriginalText(List<CoreLabel> tokens, CoreNLPProtos.Sentence sentence) {
    StringBuilder text = new StringBuilder();
    CoreLabel last = null;
    if (tokens.size() > 0) {
      CoreLabel token = tokens.get(0);
      if (token.originalText() != null) { text.append(token.originalText()); } else { text.append(token.word()); }
      last = tokens.get(0);
    }
    for (int i = 1; i < tokens.size(); ++i) {
      CoreLabel token = tokens.get(i);
      if (token.before() != null) {
        text.append(token.before());
        assert last != null;
        int missingWhitespace = (token.beginPosition() - last.endPosition()) - token.before().length();
        while (missingWhitespace > 0) {
          text.append(' ');
          missingWhitespace -= 1;
        }
      }
      if (token.originalText() != null) { text.append(token.originalText()); } else { text.append(token.word()); }
      last = token;
    }
    return text.toString();
  }
}