/** * Copyright 2007-2014 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package de.tudarmstadt.ukp.dkpro.core.matetools; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.util.JCasUtil.select; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.jcas.JCas; import org.junit.Assume; import org.junit.Rule; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures; import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; import de.tudarmstadt.ukp.dkpro.core.testing.AssumeResource; import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner; public class MateMorphTaggerTest { @Test public void testGerman() throws Exception { Assume.assumeTrue(Runtime.getRuntime().maxMemory() >= 1000000000); JCas jcas = runTest("de", "Wir brauchen ein sehr kompliziertes Beispiel , welches " + "möglichst viele Konstituenten und Dependenzen beinhaltet ."); String[] morphTagsExpected = { "[ 0, 3] - - - - - - - - - - - - - - - - - Wir (case=nom|number=pl|gender=*|person=1)", "[ 4, 12] - - - - - - - - - - - - - - - - - brauchen (number=pl|person=1|tense=pres|mood=ind)", "[ 13, 16] - - - - - - - - - - - - - - - - - ein (case=acc|number=sg|gender=neut)", "[ 17, 21] - - - - - - - - - - - - - - - - - sehr (_)", "[ 22, 35] - - - - - - - - - - - - - - - - - kompliziertes (case=acc|number=sg|gender=neut|degree=pos)", "[ 36, 44] - - - - - - - - - - - - - - - - - Beispiel (case=acc|number=sg|gender=neut)", "[ 45, 46] - - - - - - - - - - - - - - - - - , (_)", "[ 47, 54] - - - - - - - - - - - - - - - - - welches (case=acc|number=sg|gender=neut)", "[ 55, 64] - - - - - - - - - - - - - - - - - möglichst (_)", "[ 65, 70] - - - - - - - - - - - - - - - - - viele (case=acc|number=pl|gender=*)", "[ 71, 84] - - - - - - - - - - - - - - - - - Konstituenten (case=acc|number=pl|gender=*)", "[ 85, 88] - - - - - - - - - - - - - - - - - und (_)", "[ 89,100] - - - - - - - - - - - - - - - - - Dependenzen (case=acc|number=pl|gender=fem)", "[101,111] - - - - - - - - - - - - - - - - - beinhaltet (number=sg|person=3|tense=pres|mood=ind)", "[112,113] - - - - - - - - - - - - - - - - - . (_)" }; AssertAnnotations.assertMorph(morphTagsExpected, select(jcas, MorphologicalFeatures.class)); } @Test public void testFrench() throws Exception { Assume.assumeTrue(Runtime.getRuntime().maxMemory() >= 1000000000); JCas jcas = runTest("fr", "Nous avons besoin d'une phrase par exemple très " + "compliqué, qui contient des constituants que de nombreuses dépendances et que " + "possible ."); String[] morphTagsExpected = { "[ 0, 4] - - - - - - - - - - - - - - - - - Nous (g=m|n=p|p=1|s=suj)", "[ 5, 10] - - - - - - - - - - - - - - - - - avons (m=ind|n=p|p=1|t=pst)", "[ 11, 17] - - - - - - - - - - - - - - - - - besoin (g=m|n=s|s=c)", "[ 18, 23] - - - - - - - - - - - - - - - - - d'une (_)", "[ 24, 30] - - - - - - - - - - - - - - - - - phrase (g=f|n=s|s=c)", "[ 31, 34] - - - - - - - - - - - - - - - - - par (_)", "[ 35, 42] - - - - - - - - - - - - - - - - - exemple (g=m|n=s|s=c)", "[ 43, 47] - - - - - - - - - - - - - - - - - très (_)", "[ 48, 58] - - - - - - - - - - - - - - - - - compliqué, (g=m|n=s|s=qual)", "[ 59, 62] - - - - - - - - - - - - - - - - - qui (g=m|n=p|p=3|s=rel)", "[ 63, 71] - - - - - - - - - - - - - - - - - contient (m=ind|n=s|p=3|t=pst)", "[ 72, 75] - - - - - - - - - - - - - - - - - des (g=m|n=p|s=ind)", "[ 76, 88] - - - - - - - - - - - - - - - - - constituants (g=m|n=p|s=c)", "[ 89, 92] - - - - - - - - - - - - - - - - - que (g=m|n=p|p=3|s=rel)", "[ 93, 95] - - - - - - - - - - - - - - - - - de (g=f|n=p|s=ind)", "[ 96,106] - - - - - - - - - - - - - - - - - nombreuses (g=f|n=p|s=qual)", "[107,118] - - - - - - - - - - - - - - - - - dépendances (g=f|n=p|s=c)", "[119,121] - - - - - - - - - - - - - - - - - et (s=c)", "[122,125] - - - - - - - - - - - - - - - - - que (s=s)", "[126,134] - - - - - - - - - - - - - - - - - possible (g=m|n=s|s=qual)", "[135,136] - - - - - - - - - - - - - - - - - . (s=s)" }; AssertAnnotations.assertMorph(morphTagsExpected, select(jcas, MorphologicalFeatures.class)); } @Test public void testSpanish() throws Exception { Assume.assumeTrue(Runtime.getRuntime().maxMemory() >= 1000000000); JCas jcas = runTest("es", "Necesitamos una oración de ejemplo muy complicado , que " + "contiene la mayor cantidad de componentes y dependencias como sea posible ."); String[] morphTagsExpected = { "[ 0, 11] - - - - - - - - - - - - - - - - - Necesitamos (postype=main|gen=c|num=p|person=1|mood=indicative|tense=present)", "[ 12, 15] - - - - - - - - - - - - - - - - - una (postype=indefinite|gen=f|num=s)", "[ 16, 23] - - - - - - - - - - - - - - - - - oración (postype=common|gen=f|num=s)", "[ 24, 26] - - - - - - - - - - - - - - - - - de (postype=preposition|gen=c|num=c)", "[ 27, 34] - - - - - - - - - - - - - - - - - ejemplo (postype=common|gen=m|num=s)", "[ 35, 38] - - - - - - - - - - - - - - - - - muy (_)", "[ 39, 49] - - - - - - - - - - - - - - - - - complicado (postype=qualificative|gen=m|num=s|posfunction=participle)", "[ 50, 51] - - - - - - - - - - - - - - - - - , (punct=comma)", "[ 52, 55] - - - - - - - - - - - - - - - - - que (postype=relative|gen=c|num=c)", "[ 56, 64] - - - - - - - - - - - - - - - - - contiene (postype=main|gen=c|num=s|person=3|mood=indicative|tense=present)", "[ 65, 67] - - - - - - - - - - - - - - - - - la (postype=article|gen=f|num=s)", "[ 68, 73] - - - - - - - - - - - - - - - - - mayor (postype=qualificative|gen=c|num=s)", "[ 74, 82] - - - - - - - - - - - - - - - - - cantidad (postype=common|gen=f|num=s)", "[ 83, 85] - - - - - - - - - - - - - - - - - de (postype=preposition|gen=c|num=c)", "[ 86, 97] - - - - - - - - - - - - - - - - - componentes (postype=common|gen=m|num=p)", "[ 98, 99] - - - - - - - - - - - - - - - - - y (postype=coordinating)", "[100,112] - - - - - - - - - - - - - - - - - dependencias (postype=common|gen=f|num=p)", "[113,117] - - - - - - - - - - - - - - - - - como (postype=subordinating)", "[118,121] - - - - - - - - - - - - - - - - - sea (postype=semiauxiliary|gen=c|num=s|person=3|mood=subjunctive|tense=present)", "[122,129] - - - - - - - - - - - - - - - - - posible (postype=qualificative|gen=c|num=s)", "[130,131] - - - - - - - - - - - - - - - - - . (punct=period)"}; AssertAnnotations.assertMorph(morphTagsExpected, select(jcas, MorphologicalFeatures.class)); } private JCas runTest(String aLanguage, String aText) throws Exception { Assume.assumeTrue(Runtime.getRuntime().maxMemory() >= 2000000000); AssumeResource.assumeResource(MateMorphTagger.class, "morphtagger", aLanguage, null); AnalysisEngineDescription lemma = createEngineDescription(MateLemmatizer.class); AnalysisEngineDescription morphTag = createEngineDescription(MateMorphTagger.class); AnalysisEngineDescription aggregate = createEngineDescription(lemma, morphTag); return TestRunner.runTest(aggregate, aLanguage, aText); } @Rule public DkproTestContext testContext = new DkproTestContext(); }