MakePrefixFile.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.tagger.util; 
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import edu.stanford.nlp.tagger.io.TaggedFileRecord;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;

import java.util.List;
import java.util.Properties;
import java.util.Random;

/**
 * Takes a tagger data file of any format readable by the tagger and
 * outputs a new file containing tagged sentences which are prefixes
 * of the original data.  The prefixes are of random length.  If the
 * -fullSentence parameter is true, the original sentence is output
 * after each prefix.
 * <br>
 * Input is taken from the tagger file described in "input".  Output
 * goes to stdout.
 *
 * @author John Bauer
 */
public class MakePrefixFile  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(MakePrefixFile.class);

  public static void main(String[] args) {
    Properties config = StringUtils.argsToProperties(args);
    log.info(config);

    boolean fullSentence = PropertiesUtils.getBool(config, "fullSentence", false);

    Random random = new Random();
    String tagSeparator = config.getProperty("tagSeparator", TaggerConfig.TAG_SEPARATOR);

    TaggedFileRecord record = TaggedFileRecord.createRecord(config, config.getProperty("input"));
    for (List<TaggedWord> sentence : record.reader()) {
      int len = random.nextInt(sentence.size()) + 1;
      System.out.println(SentenceUtils.listToString(sentence.subList(0, len), false, tagSeparator));
      if (fullSentence) {
        System.out.println(SentenceUtils.listToString(sentence, false, tagSeparator));
      }
    }
  }

  
}