/**
* Copyright 2007-2014
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package de.tudarmstadt.ukp.dkpro.core.stanfordnlp;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.util.JCasUtil.select;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner;
public class StanfordLemmatizerTest
{
@Test
public void testUnderscore() throws Exception
{
runTest("en", "foo _ bar",
new String[] { "foo", "_", "bar" });
}
@Test
public void testEnglish() throws Exception
{
runTest("en", "This is a test .",
new String[] { "this", "be", "a", "test", "." });
runTest("en", "We need a very complicated example sentence , which "
+ "contains as many constituents and dependencies as possible .",
new String[] { "we", "need", "a", "very", "complicated", "example",
"sentence", ",", "which", "contain", "as", "many", "constituent", "and",
"dependency", "as", "possible", "." });
}
@Test(expected = AnalysisEngineProcessException.class)
public void testNotEnglish()
throws Exception
{
runTest("de", "Das ist ein test .", new String[] {} );
}
@Test
public void testUrl() throws Exception
{
runTest("en",
"Details hinzu findet man unter http://www.armytimes.com/news/2009/11/army_M4_112109w/ .",
new String[] { "detail", "hinzu", "findet", "man", "unter",
"http://www.armytimes.com/news/2009/11/army_m4_112109w/", "." });
}
private void runTest(String aLanguage, String testDocument, String[] lemmas)
throws Exception
{
AnalysisEngineDescription posTagger = createEngineDescription(StanfordPosTagger.class);
AnalysisEngineDescription lemmatizer = createEngineDescription(StanfordLemmatizer.class);
JCas aJCas = TestRunner.runTest(createEngineDescription(posTagger, lemmatizer),
aLanguage, testDocument);
AssertAnnotations.assertLemma(lemmas, select(aJCas, Lemma.class));
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}