NERFeatureFactoryITest.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.ie;


import edu.stanford.nlp.ling.SentenceUtils;
import junit.framework.TestCase;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.util.PaddedList;

import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;


/**
 * Tests that various features options produce the expected sets of strings.
 * Actually, right now it only tests the sloppy gazette.
 *
 * @author John Bauer
 */
public class NERFeatureFactoryITest extends TestCase {

  private static void checkFeatures(Set<String> features, String ... expected) {
    assertEquals(expected.length, features.size());
    for (String feature : expected) {
      assertTrue(features.contains(feature));
    }
  }

  public void testSloppyGazette() {
    List<CoreLabel> sentence = SentenceUtils.toCoreLabelList("For three years , John Bauer has worked at Stanford .".split(" +"));
    PaddedList<CoreLabel> paddedSentence = new PaddedList<CoreLabel>(sentence, new CoreLabel());

    Properties props = new Properties();
    props.setProperty("useGazettes", "true");
    props.setProperty("sloppyGazette", "true");
    props.setProperty("gazette", "projects/core/data/edu/stanford/nlp/ie/test_gazette.txt");
    SeqClassifierFlags flags = new SeqClassifierFlags(props);
    NERFeatureFactory<CoreLabel> factory = new NERFeatureFactory<CoreLabel>();
    factory.init(flags);

    Set<String> features;
    features = new HashSet<String>(factory.featuresC(paddedSentence, 4));
    checkFeatures(features, "BAR-GAZ", "BAZ-GAZ", "FOO-GAZ", "BAR-GAZ2", "BAZ-GAZ2", "FOO-GAZ1", "John-WORD");
    features = new HashSet<String>(factory.featuresC(paddedSentence, 5));
    checkFeatures(features, "BAR-GAZ", "BAZ-GAZ", "BAR-GAZ2", "BAZ-GAZ2", "Bauer-WORD");
    features = new HashSet<String>(factory.featuresC(paddedSentence, 6));
    checkFeatures(features, "has-WORD");
  }

}