CoNLLUOutputter.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.pipeline;

import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.util.List;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.trees.ud.CoNLLUDocumentWriter;
import edu.stanford.nlp.util.CoreMap;

/**
 * <p>Write a subset of our CoreNLP output in CoNLL-U format.</p>
 *
 * <p>The fields currently output are:</p>
 *
 * <table>
 *   <tr>
 *     <td>Field Number</td>
 *     <td>Field Name</td>
 *     <td>Description</td>
 *   </tr>
 *   <tr>
 *     <td>1</td>
 *     <td>ID</td>
 *     <td>Word index, integer starting at 1 for each new sentence; may be a range for tokens with multiple words.</td>
 *   </tr>
 *   <tr>
 *     <td>2</td>
 *     <td>FORM</td>
 *     <td>Word form or punctuation symbol.</td>
 *   </tr>
 *   <tr>
 *     <td>3</td>
 *     <td>LEMMA</td>
 *     <td>Lemma or stem of word form, or an underscore if not available.</td>
 *   </tr>
 *   <tr>
 *     <td>4</td>
 *     <td>CPOSTAG</td>
 *     <td>Universal part-of-speech tag, or underscore if not available.</td>
 *   </tr>
 *   <tr>
 *     <td>5</td>
 *     <td>POSTAG</td>
 *     <td>Language-specific part-of-speech tag, or underscore if not available.</td>
 *   </tr>
 *   <tr>
 *     <td>6</td>
 *     <td>FEATS</td>
 *     <td>List of morphological features from the universal feature inventory or from a defined language-specific extension; underscore if not available.</td>
 *   </tr>
 *   <tr>
 *     <td>7</td>
 *     <td>HEAD</td>
 *     <td>Head of the current token, which is either a value of ID or zero ('0').
 *         This is underscore if not available.</td>
 *   </tr>
 *   <tr>
 *     <td>8</td>
 *     <td>DEPREL</td>
 *     <td>Dependency relation to the HEAD, or underscore if not available.</td>
 *   </tr>
 *   <tr>
 *     <td>9</td>
 *     <td>DEPS</td>
 *     <td>List of secondary dependencies</td>
 *   </tr>
 *   <tr>
 *     <td>10</td>
 *     <td>MISC</td>
 *     <td>Any other annotation</td>
 *   </tr>
 * </table>
 *
 * @author Sebastian Schuster
 * @author Gabor Angeli
 *
 */
public class CoNLLUOutputter extends AnnotationOutputter {

  private static final CoNLLUDocumentWriter conllUWriter = new CoNLLUDocumentWriter();

  public CoNLLUOutputter() {}

  @Override
  public void print(Annotation doc, OutputStream target, Options options) throws IOException {
    PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));

    List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
      SemanticGraph sg = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
      if (sg != null) {
        writer.print(conllUWriter.printSemanticGraph(sg));
      }
    }
    writer.flush();
  }


  public static void conllUPrint(Annotation annotation, OutputStream os) throws IOException {
    new CoNLLUOutputter().print(annotation, os);
  }

  public static void conllUPrint(Annotation annotation, OutputStream os, StanfordCoreNLP pipeline) throws IOException {
    new CoNLLUOutputter().print(annotation, os, pipeline);
  }

  public static void conllUPrint(Annotation annotation, OutputStream os, Options options) throws IOException {
    new CoNLLUOutputter().print(annotation, os, options);
  }

}