package edu.stanford.nlp.pipeline; import junit.framework.TestCase; import java.util.ArrayList; import java.util.List; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.util.ArrayCoreMap; import edu.stanford.nlp.util.CoreMap; /** * Tests a short pipeline starting from raw text and finishing with * the MorphaAnnotator. Checks the output to make sure it is as * expected. * * @author Heeyoung Lee * @author John Bauer */ public class MorphaAnnotatorITest extends TestCase { private static AnnotationPipeline fullPipeline; private static AnnotationPipeline shortPipeline; @Override public void setUp() throws Exception { synchronized(MorphaAnnotatorITest.class) { if (fullPipeline == null) { fullPipeline = new AnnotationPipeline(); fullPipeline.addAnnotator(new TokenizerAnnotator(false, "en")); fullPipeline.addAnnotator(new WordsToSentencesAnnotator(false)); fullPipeline.addAnnotator(new POSTaggerAnnotator(false)); fullPipeline.addAnnotator(new MorphaAnnotator(false)); } if (shortPipeline == null) { shortPipeline = new AnnotationPipeline(); shortPipeline.addAnnotator(new MorphaAnnotator(false)); } } } private static void checkResult(List<CoreLabel> words) { assertEquals(words.size(), answer.length); for (int i = 0 ; i < answer.length ; i++){ CoreLabel word = words.get(i); String lemma = word.get(CoreAnnotations.LemmaAnnotation.class); assertEquals(lemma, answer[i]); } } public void testMorphaAnnotator() throws Exception { Annotation document = new Annotation(text); fullPipeline.annotate(document); checkResult(document.get(CoreAnnotations.TokensAnnotation.class)); } private static List<CoreLabel> getTestWords() { List<CoreLabel> words = new ArrayList<CoreLabel>(); if (tokenizedText.length != tokenizedTags.length) { throw new AssertionError("tokenizedText and tokenizedTags " + "must be of the same length"); } for (int i = 0; i < tokenizedText.length; ++i) { CoreLabel word = new CoreLabel(); word.setWord(tokenizedText[i]); word.set(CoreAnnotations.TextAnnotation.class, tokenizedText[i]); word.setTag(tokenizedTags[i]); words.add(word); } return words; } public void testSentencesAnnotation() throws Exception { List<CoreLabel> words = getTestWords(); CoreMap sentence = new ArrayCoreMap(); sentence.set(CoreAnnotations.TokensAnnotation.class, words); List<CoreMap> sentences = new ArrayList<CoreMap>(); sentences.add(sentence); Annotation document = new Annotation(text); document.set(CoreAnnotations.SentencesAnnotation.class, sentences); shortPipeline.annotate(document); checkResult(words); } private static final String text = "I saw him ordering them to saw. Jack 's father has n't played\ngolf since 20 years ago . I 'm going to the\nbookstore to return a book Jack and his friends bought me ."; private static final String[] answer = {"I", "see", "he", "order", "they", "to", "saw", ".", "Jack", "'s", "father", "have", "not", "play", "golf", "since", "20", "year", "ago", ".", "I", "be", "go", "to", "the", "bookstore", "to", "return", "a", "book", "Jack", "and", "he", "friend", "buy", "I", "."}; private static final String[] tokenizedText = {"I", "saw", "him", "ordering", "them", "to", "saw", ".", "Jack", "'s", "father", "has", "n't", "played", "golf", "since", "20", "years", "ago", ".", "I", "'m", "going", "to", "the", "bookstore", "to", "return", "a", "book", "Jack", "and", "his", "friends", "bought", "me", "."}; private static final String[] tokenizedTags = {"PRP", "VBD", "PRP", "VBG", "PRP", "TO", "NN", ".", "NNP", "POS", "NN", "VBZ", "RB", "VBN", "NN", "IN", "CD", "NNS", "RB", ".", "PRP", "VBP", "VBG", "TO", "DT", "NN", "TO", "VB", "DT", "NN", "NNP", "CC", "PRP$", "NNS", "VBD", "PRP", "."}; }