/* * Copyright 2012 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.opennlp; import static de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations.assertPOS; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.pipeline.SimplePipeline.runPipeline; import static org.apache.uima.fit.util.JCasUtil.select; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.jcas.JCas; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; import de.tudarmstadt.ukp.dkpro.core.testing.AssumeResource; import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner; public class OpenNlpPosTaggerTest { @Test public void simpleExample() throws Exception { // NOTE: This file contains Asciidoc markers for partial inclusion of this file in the // documentation. Do not remove these tags! // tag::example[] JCas jcas = JCasFactory.createText("This is a test", "en"); runPipeline(jcas, createEngineDescription(OpenNlpSegmenter.class), createEngineDescription(OpenNlpPosTagger.class)); for (Token t : select(jcas, Token.class)) { System.out.printf("%s %s%n", t.getCoveredText(), t.getPos().getPosValue()); } // end::example[] assertPOS( new String[] { "DET", "VERB", "DET", "NOUN" }, new String[] { "DT", "VBZ", "DT", "NN" }, select(jcas, POS.class)); } @Test public void testEnglishAutoLoad() throws Exception { String oldModelCache = System.setProperty(ResourceObjectProviderBase.PROP_REPO_CACHE, "target/test-output/models"); String oldOfflineMode = System.setProperty(ResourceObjectProviderBase.PROP_REPO_OFFLINE, ResourceObjectProviderBase.FORCE_AUTO_LOAD); try { TestRunner.autoloadModelsOnNextTestRun(); runTest("en", null, "This is a test .", new String[] { "DT", "VBZ", "DT", "NN", "." }, new String[] { "DET", "VERB", "DET", "NOUN", "PUNCT" }); } finally { if (oldModelCache != null) { System.setProperty(ResourceObjectProviderBase.PROP_REPO_CACHE, oldModelCache); } else { System.getProperties().remove(ResourceObjectProviderBase.PROP_REPO_CACHE); } if (oldOfflineMode != null) { System.setProperty(ResourceObjectProviderBase.PROP_REPO_OFFLINE, oldOfflineMode); } else { System.getProperties().remove(ResourceObjectProviderBase.PROP_REPO_OFFLINE); } } } @Test public void testEnglish() throws Exception { runTest("en", null, "This is a test .", new String[] { "DT", "VBZ", "DT", "NN", "." }, new String[] { "DET", "VERB", "DET", "NOUN", "PUNCT" }); runTest("en", null, "A neural net .", new String[] { "DT", "JJ", "NN", "." }, new String[] { "DET", "ADJ", "NOUN", "PUNCT" }); runTest("en", null, "John is purchasing oranges .", new String[] { "NNP", "VBZ", "VBG", "NNS", "." }, new String[] { "PROPN", "VERB", "VERB", "NOUN", "PUNCT" }); // This is WRONG tagging. "jumps" is tagged as "NNS" runTest("en", "maxent", "The quick brown fox jumps over the lazy dog . \n", new String[] { "DT", "JJ", "JJ", "NN", "NNS", "IN", "DT", "JJ", "NN", "." }, new String[] { "DET", "ADJ", "ADJ", "NOUN", "NOUN", "ADP", "DET", "ADJ", "NOUN", "PUNCT" }); } @Test public void testEnglishExtra() throws Exception { runTest("en", "perceptron", "The quick brown fox jumps over the lazy dog . \n", new String[] { "DT", "JJ", "JJ", "NN", "NNS", "IN", "DT", "JJ", "NN", "." }, new String[] { "DET", "ADJ", "ADJ", "NOUN", "NOUN", "ADP", "DET", "ADJ", "NOUN", "PUNCT" }); runTest("en", "perceptron-ixa", "The quick brown fox jumps over the lazy dog . \n", new String[] { "DT", "JJ", "JJ", "NN", "NNS", "IN", "DT", "JJ", "NN", "." }, new String[] { "DET", "ADJ", "ADJ", "NOUN", "NOUN", "ADP", "DET", "ADJ", "NOUN", "PUNCT" }); } @Test public void testGerman() throws Exception { runTest("de", null, "Das ist ein Test .", new String[] { "PDS", "VAFIN", "ART", "NN", "$." }, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); runTest("de", "maxent", "Das ist ein Test .", new String[] { "PDS", "VAFIN", "ART", "NN", "$." }, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); runTest("de", "perceptron", "Das ist ein Test .", new String[] { "PDS", "VAFIN", "ART", "NN", "$." }, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); } @Test public void testItalian() throws Exception { runTest("it", null, "Questo è un test .", new String[] { "PD", "Vip3", "RI", "Sn", "FS" }, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); runTest("it", "perceptron", "Questo è un test .", new String[] { "PD", "Vip3", "RI", "Sn", "FS" }, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); } @Ignore("We don't have these models integrated yet") @Test public void testPortuguese() throws Exception { String[] bosqueTags = new String[] { "?", "adj", "adv", "art", "conj-c", "conj-s", "ec", "in", "n", "num", "pp", "pron-det", "pron-indp", "pron-pers", "prop", "prp", "punc", "v-fin", "v-ger", "v-inf", "v-pcp", "vp" }; JCas jcas = runTest("pt", null, "Este é um teste .", new String[] { "pron-det", "v-fin", "art", "n", "punc" }, new String[] { "PRON", "V", "ART", "NN", "PUNC" }); AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest("pt", "maxent", "Este é um teste .", new String[] { "pron-det", "v-fin", "art", "n", "punc" }, new String[] { "PRON", "V", "ART", "NN", "PUNC" }); AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest("pt", "perceptron", "Este é um teste .", new String[] { "pron-det", "v-fin", "art", "n", "punc" }, new String[] { "PRON", "V", "ART", "NN", "PUNC" }); AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest("pt", "mm-maxent", "Este é um teste .", new String[] { "PROSUB", "V", "ART", "N", "." }, new String[] { "POS", "POS", "POS", "POS", "POS" }); // AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest("pt", "mm-perceptron", "Este é um teste .", new String[] { "PROSUB", "V", "ART", "N", "." }, new String[] { "POS", "POS", "POS", "POS", "POS" }); // AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest("pt", "cogroo", "Este é um teste .", new String[] { "pron-det", "v-fin", "artm", "nm", "." }, new String[] { "POS", "POS", "POS", "POS", "POS" }); AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); } @Test public void testSpanish() throws Exception { runTest("es", "maxent", "Esta es una prueba .", new String[] { "PD", "VSI", "DI", "NC", "Fp" }, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); runTest("es", "maxent-ixa", "Esta es una prueba .", new String[] { "PD0FS000", "VSIP3S0", "DI0FS0", "NCFS000", "Fp"}, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); runTest("es", "perceptron-ixa", "Esta es una prueba .", new String[] { "PD0FS000", "VSIP3S0", "DI0FS0", "NCFS000", "Fp"}, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); } @Test public void testSwedish() throws Exception { runTest("sv", "maxent", "Detta är ett test .", new String[] { "PO", "AV", "EN", "NN", "IP" }, new String[] { "POS", "POS", "POS", "POS", "POS" }); } private JCas runTest(String language, String variant, String testDocument, String[] tags, String[] tagClasses) throws Exception { AssumeResource.assumeResource(OpenNlpPosTagger.class, "tagger", language, variant); AnalysisEngine engine = createEngine(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_VARIANT, variant, OpenNlpPosTagger.PARAM_PRINT_TAGSET, true); JCas jcas = TestRunner.runTest(engine, language, testDocument); AssertAnnotations.assertPOS(tagClasses, tags, select(jcas, POS.class)); return jcas; } @Rule public DkproTestContext testContext = new DkproTestContext(); }