package edu.stanford.nlp.pipeline;
import java.util.List;
import java.util.Properties;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import junit.framework.TestCase;
import org.junit.Assert;
/**
* @author Christopher Manning
*/
public class TrueCaseAnnotatorITest extends TestCase {
private static final boolean VERBOSE = false;
private static void runSentence(StanfordCoreNLP pipeline, String sentence, String[] ans) {
Annotation document = new Annotation(sentence);
pipeline.annotate(document);
// check that tokens are present
List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
Assert.assertNotNull(tokens);
Assert.assertEquals("Wrong number of tokens: " + tokens + " vs. " + ans.length, ans.length, tokens.size());
// check that sentences are present
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
Assert.assertNotNull(sentences);
Assert.assertEquals("Wrong number of sentences", 1, sentences.size());
for (CoreMap sent: document.get(CoreAnnotations.SentencesAnnotation.class)){
List<? extends CoreLabel> words = sent.get(CoreAnnotations.TokensAnnotation.class);
for(int i = 0; i < words.size(); i ++) {
String w = words.get(i).word();
String tcw = words.get(i).get(CoreAnnotations.TrueCaseTextAnnotation.class);
if (VERBOSE) {
if (!w.equals(tcw)) {
System.err.print("\"" + w + "\" true cased to \"" + tcw + "\" in context:");
for (int j = Math.max(0, i - 2); j < Math.min(words.size(), i + 2); j++) {
System.err.print(" " + words.get(j).word());
}
System.err.println();
}
}
assertEquals("Error in truecasing", ans[i], tcw);
}
}
}
public void testTrueCaseAnnotator() {
// run an annotation through the pipeline
String text1 = "HEATHER BROWN WAS LEAD WOMAN AT DUKE UNIVERSITY.";
String text2 = "heather brown was lead woman at duke university.";
String text3 = "Heather Brown was lead woman at Duke University.";
String[] ans1 = { "Heather", "Brown", "was", "lead", "woman", "at", "Duke", "University", "." };
String text4 = "\"GOOD MORNING AMERICA FROM MCVEY!\"";
String text5 = "\"good morning america from mcvey!\"";
String text6 = "\"Good Morning America From McVey!\"";
String[] ans4 = { "``", "Good", "Morning", "America", "from", "McVey", "!", "''" };
Properties props = PropertiesUtils.asProperties("annotators", "tokenize, ssplit, truecase");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
runSentence(pipeline, text1, ans1);
runSentence(pipeline, text2, ans1);
runSentence(pipeline, text3, ans1);
runSentence(pipeline, text4, ans4);
runSentence(pipeline, text5, ans4);
runSentence(pipeline, text6, ans4);
}
}