/** * Copyright 2007-2014 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package de.tudarmstadt.ukp.dkpro.core.matetools; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.util.JCasUtil.select; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.jcas.JCas; import org.junit.Assume; import org.junit.Rule; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; import de.tudarmstadt.ukp.dkpro.core.testing.AssumeResource; import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner; public class MatePosTaggerTest { @Test public void testGerman() throws Exception { JCas jcas = runTest("de", "Wir brauchen ein sehr kompliziertes Beispiel , welches " + "möglichst viele Konstituenten und Dependenzen beinhaltet ."); String[] posOriginal = { "PPER", "VVFIN", "ART", "ADV", "ADJA", "NN", "$,", "PRELS", "ADV", "PIAT", "NN", "KON", "NN", "VVFIN", "$." }; String[] posMapped = { "PRON", "VERB", "DET", "ADV", "ADJ", "NOUN", "PUNCT", "PRON", "ADV", "PRON", "NOUN", "CONJ", "NOUN", "VERB", "PUNCT" }; String[] posTags = { "$(", "$,", "$.", "ADJA", "ADJD", "ADV", "APPO", "APPR", "APPRART", "APZR", "ART", "CARD", "END", "FM", "ITJ", "KOKOM", "KON", "KOUI", "KOUS", "MID", "NE", "NN", "NNE", "PDAT", "PDS", "PIAT", "PIS", "PPER", "PPOSAT", "PPOSS", "PRELAT", "PRELS", "PRF", "PROAV", "PTKA", "PTKANT", "PTKNEG", "PTKVZ", "PTKZU", "PWAT", "PWAV", "PWS", "STPOS", "STR", "TRUNC", "VAFIN", "VAIMP", "VAINF", "VAPP", "VMFIN", "VMINF", "VMPP", "VVFIN", "VVIMP", "VVINF", "VVIZU", "VVPP", "XY" }; AssertAnnotations.assertPOS(posMapped, posOriginal, select(jcas, POS.class)); AssertAnnotations.assertTagset(POS.class, "stts", posTags, jcas); } @Test public void testEnglish() throws Exception { Assume.assumeTrue(Runtime.getRuntime().maxMemory() >= 2000000000); JCas jcas = runTest("en", "We need a very complicated example sentence , which " + "contains as many constituents and dependencies as possible ."); String[] posOriginal = { "PRP", "VBP", "DT", "RB", "JJ", "NN", "NN", ",", "WDT", "VBZ", "IN", "DT", "NNS", "CC", "NNS", "IN", "JJ", "." }; String[] posMapped = { "PRON", "VERB", "DET", "ADV", "ADJ", "NOUN", "NOUN", "PUNCT", "DET", "VERB", "ADP", "DET", "NOUN", "CONJ", "NOUN", "ADP", "ADJ", "PUNCT" }; String[] posTags = { "#", "$", "''", "(", ")", ",", ".", ":", "CC", "CD", "DT", "END", "EX", "FW", "HYPH", "IN", "JJ", "JJR", "JJS", "LS", "MD", "MID", "NIL", "NN", "NNP", "NNPS", "NNS", "PDT", "POS", "PRF", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "STPOS", "STR", "SYM", "TO", "UH", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``" }; AssertAnnotations.assertPOS(posMapped, posOriginal, select(jcas, POS.class)); AssertAnnotations.assertTagset(POS.class, "ptb", posTags, jcas); } @Test public void testFrench() throws Exception { JCas jcas = runTest("fr", "Nous avons besoin d'une phrase par exemple très " + "compliqué, qui contient des constituants que de nombreuses dépendances et que " + "possible ."); String[] posMapped = { "PRON", "VERB", "NOUN", "ADP", "NOUN", "ADP", "NOUN", "ADV", "ADJ", "PRON", "VERB", "DET", "NOUN", "CONJ", "DET", "ADJ", "NOUN", "CONJ", "CONJ", "ADJ", "PUNCT" }; String[] posOriginal = { "CLS", "V", "NC", "P", "NC", "P", "NC", "ADV", "ADJ", "PROREL", "V", "DET", "NC", "CS", "DET", "ADJ", "NC", "CC", "CS", "ADJ", "PONCT" }; String[] posTags = { "ADJ", "ADJWH", "ADV", "ADVWH", "CC", "CLO", "CLR", "CLS", "CS", "DET", "DETWH", "END", "ET", "I", "MID", "NC", "NPP", "P", "P+D", "P+PRO", "PONCT", "PREF", "PRO", "PROREL", "PROWH", "STPOS", "STR", "V", "VIMP", "VINF", "VPP", "VPR", "VS" }; String[] unmappedPos = { "END", "MID", "STPOS", "STR" }; AssertAnnotations.assertPOS(posMapped, posOriginal, select(jcas, POS.class)); AssertAnnotations.assertTagset(POS.class, "melt", posTags, jcas); AssertAnnotations.assertTagsetMapping(POS.class, "melt", unmappedPos, jcas); } private JCas runTest(String aLanguage, String aText) throws Exception { AssumeResource.assumeResource(MatePosTagger.class, "tagger", aLanguage, null); AnalysisEngineDescription posTag = createEngineDescription(MatePosTagger.class); return TestRunner.runTest(posTag, aLanguage, aText); } @Rule public DkproTestContext testContext = new DkproTestContext(); }