/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.ixa;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.util.JCasUtil.select;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.jcas.JCas;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations;
import de.tudarmstadt.ukp.dkpro.core.testing.AssumeResource;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner;
public class IxaLemmatizerTest
{
@Test
public void testBasque()
throws Exception
{
JCas jcas = runTest("eu", null, "Oso konplikatua esaldi adibidea da , eta horrek ahalik "
+ "eta osagai eta mendekotasunen asko dauka behar dugu .");
String[] lemmas = { "oso", "konplikatu", "esaldi", "adibide", "izan", ",", "eta", "hori",
"ahal", "eta", "osagai", "eta", "mendekotasun", "asko", "eduki", "behar", "ukan",
"." };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
}
@Test
public void testDutch()
throws Exception
{
JCas jcas = runTest("nl", null, "We hebben een zeer ingewikkeld voorbeeld zin , die zoveel "
+ "mogelijk bestanddelen en afhankelijkheden bevat mogelijk .");
String[] lemmas = { "we", "heb", "een", "zeer", "ingewikkeld", "voorbeeld", "zin", ",",
"die", "zoveel", "mogelijk", "bestanddelen", "en", "afhankelijked_af", "bevat",
"mogelijk", "." };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
}
@Test
public void testEnglish()
throws Exception
{
JCas jcas = runTest("en", null, "We need a very complicated example sentence , which "
+ "contains as many constituents and dependencies as possible .");
String[] lemmas = { "we", "need", "a", "very", "complicate", "example", "sentence", ",",
"which", "contain", "as", "many", "constituent", "and", "dependency", "as",
"possible", "." };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
}
@Test
public void testEnglishExtra()
throws Exception
{
{
JCas jcas = runTest("en", "perceptron-ud", "We need a very complicated example "
+ "sentence , which contains as many constituents and dependencies as "
+ "possible .");
String[] lemmas = { "we", "need", "a", "very", "complicated", "example", "sentence",
",", "which", "contains", "as", "many", "constituents", "and", "dependency",
"as", "possible", "." };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
}
{
JCas jcas = runTest("en", "xlemma-perceptron-ud", "We need a very complicated example "
+ "sentence , which contains as many constituents and dependencies as "
+ "possible.");
String[] lemmas = { "we", "need", "a", "very", "complicate", "example", "sentence", ",",
"which", "contain", "as", "many", "constituent", "and", "dependency", "as",
"possible." };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
}
}
@Test
public void testGerman()
throws Exception
{
JCas jcas = runTest("de", null, "Wir brauchen ein sehr kompliziertes Beispiel , welches "
+ "möglichst viele Konstituenten und Dependenzen beinhaltet .");
String[] lemmas = { "wir", "brauchen", "ein", "sehr", "kompliziert", "beispiel", "_",
"welcher", "möglichst", "vieler", "konstituent", "und", "dependenz", "beinhalten",
"_" };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
}
@Test
public void testItalian()
throws Exception
{
JCas jcas = runTest("it", null, "Abbiamo bisogno di un esempio molto complicata frase , "
+ "che contiene tante componenti e le dipendenze possibile .");
String[] lemmas = { "avere", "bisogno", "di", "uno", "esempio", "molto", "complicato",
"frase", ",", "che", "contenere", "tanto", "componente", "e", "il", "dipendenza",
"possibile", "." };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
}
@Test
public void testFrench()
throws Exception
{
JCas jcas = runTest("es", null, "Nous avons besoin d' une phrase par exemple très "
+ "compliqué , qui contient des constituants que de nombreuses dépendances et que "
+ "possible .");
String[] lemmas = { "nous", "avon", "besoir", "d'", "unir", "phrar", "par", "exemple",
"trèr", "compliqué", ",", "qui", "contientr", "d", "constituant", "que", "de",
"nombreuse", "dépendanz", "et", "que", "possible", "." };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
}
@Test
public void testGalician()
throws Exception
{
JCas jcas = runTest("gl", null, "Necesitamos unha frase de exemplo moi complicado , que "
+ "contén o maior número de compoñentes e dependencias posible .");
String[] lemmas = { "necesitar", "un", "frase", "de", "exemplo", "moi", "complicar", ",",
"que", "conter", "o", "maior", "número", "de", "compoñente", "e", "dependencia",
"posible", "." };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
}
@Test
public void testSpanish()
throws Exception
{
JCas jcas = runTest("es", null, "Necesitamos una oración de ejemplo muy complicado , que "
+ "contiene la mayor cantidad de componentes y dependencias como sea posible .");
String[] lemmas = { "necesitr", "uno", "oración", "de", "ejemplo", "mucho", "complicado",
",", "que", "contener", "el", "mayor", "cantidad", "de", "componente", "y",
"dependencia", "como", "ser", "posible", "." };
String[] posTags = { "AO0FP0", "AO0FS0", "AO0MP0", "AO0MS0", "AQ0000", "AQ000P", "AQ0CC0",
"AQ0CP0", "AQ0CS0", "AQ0FP0", "AQ0FPP", "AQ0FS0", "AQ0FSP", "AQ0MP0", "AQ0MPP",
"AQ0MS0", "AQ0MSP", "CC", "CS", "DA0CS0", "DA0FP0", "DA0FS0", "DA0MP0", "DA0MS0",
"DD0CP0", "DD0CS0", "DD0FP0", "DD0FS0", "DD0MP0", "DD0MS0", "DE0CC0", "DI0CP0",
"DI0CS0", "DI0FP0", "DI0FS0", "DI0MP0", "DI0MS0", "DN0CP0", "DN0CS0", "DN0FP0",
"DN0FS0", "DN0MP0", "DN0MS0", "DP1CPS", "DP1CSS", "DP1FPP", "DP1FSP", "DP1MPP",
"DP1MSP", "DP1MSS", "DP2CPS", "DP2CSS", "DP2FSP", "DP3CP0", "DP3CS0", "DP3MP0",
"DT0FS0", "DT0MP0", "Faa", "Fat", "Fc", "Fd", "Fe", "Fg", "Fh", "Fia", "Fit", "Fp",
"Fpa", "Fpt", "Fs", "Fx", "Fz", "I", "NCCC000", "NCCP000", "NCCS000", "NCF0000",
"NCFC000", "NCFP000", "NCFS000", "NCMC000", "NCMP000", "NCMS000", "NP00000",
"NPCC000", "P0000000", "P00CC000", "P01CS000", "P02CS000", "P03CC000", "PD0CP000",
"PD0CS000", "PD0FP000", "PD0FS000", "PD0MP000", "PD0MS000", "PI0CC000", "PI0CP000",
"PI0CS000", "PI0FP000", "PI0FS000", "PI0MP000", "PI0MS000", "PN0CP000", "PN0FP000",
"PN0FS000", "PN0MP000", "PN0MS000", "PP1CP000", "PP1CS000", "PP1CSN00", "PP1CSO00",
"PP1MP000", "PP2CP000", "PP2CP00P", "PP2CS000", "PP2CS00P", "PP2CSN00", "PP2CSO00",
"PP3CC000", "PP3CCA00", "PP3CCO00", "PP3CP000", "PP3CPA00", "PP3CPD00", "PP3CS000",
"PP3CSA00", "PP3CSD00", "PP3FP000", "PP3FPA00", "PP3FS000", "PP3FSA00", "PP3MP000",
"PP3MPA00", "PP3MS000", "PP3MSA00", "PR0CC000", "PR0CP000", "PR0CS000", "PR0FP000",
"PR0FS000", "PR0MP000", "PR0MS000", "PT000000", "PT0CC000", "PT0CP000", "PT0CS000",
"PT0FP000", "PT0MP000", "PX1FP0P0", "PX1FS0P0", "PX1FS0S0", "PX1MP0P0", "PX2FS0S0",
"PX3CS000", "PX3FP000", "PX3FS000", "PX3MP000", "PX3MS000", "RG", "RN", "SPCMS",
"SPS00", "SPSCC", "VAG0000", "VAIC1P0", "VAIC3P0", "VAIC3S0", "VAIF1P0", "VAIF1S0",
"VAIF2S0", "VAIF3P0", "VAIF3S0", "VAII1P0", "VAII1S0", "VAII2S0", "VAII3P0",
"VAII3S0", "VAIP1P0", "VAIP1S0", "VAIP2P0", "VAIP2S0", "VAIP3P0", "VAIP3S0",
"VAIS3P0", "VAIS3S0", "VAN0000", "VAP00SM", "VASI1P0", "VASI1S0", "VASI3P0",
"VASI3S0", "VASP1S0", "VASP3P0", "VMG0000", "VMIC1P0", "VMIC1S0", "VMIC2S0",
"VMIC3P0", "VMIC3S0", "VMIF1P0", "VMIF1S0", "VMIF2S0", "VMIF3P0", "VMIF3S0",
"VMII1P0", "VMII1S0", "VMII2P0", "VMII2S0", "VMII3P0", "VMII3S0", "VMIP1P0",
"VMIP1S0", "VMIP2P0", "VMIP2S0", "VMIP3P0", "VMIP3PC", "VMIP3S0", "VMIP3SC",
"VMIS1P0", "VMIS1S0", "VMIS2S0", "VMIS3P0", "VMIS3S0", "VMIS3SC", "VMM01P0",
"VMM02S0", "VMM03P0", "VMM03S0", "VMN0000", "VMP00PF", "VMP00PM", "VMP00SF",
"VMP00SM", "VMSI1P0", "VMSI1S0", "VMSI3P0", "VMSI3S0", "VMSP1P0", "VMSP1S0",
"VMSP2P0", "VMSP2S0", "VMSP3P0", "VMSP3S0", "VSG0000", "VSIC1S0", "VSIC2S0",
"VSIC3P0", "VSIC3S0", "VSIF1S0", "VSIF3P0", "VSIF3S0", "VSII1P0", "VSII3P0",
"VSII3S0", "VSIP1P0", "VSIP1S0", "VSIP2S0", "VSIP3P0", "VSIP3S0", "VSIS1S0",
"VSIS3P0", "VSIS3S0", "VSM02S0", "VSN0000", "VSP00SM", "VSSF3S0", "VSSI3P0",
"VSSI3S0", "VSSP1S0", "VSSP2S0", "VSSP3P0", "VSSP3S0", "W", "Z", "Zm", "Zp", "_" };
AssertAnnotations.assertLemma(lemmas, select(jcas, Lemma.class));
// AssertAnnotations.assertTagset(IxaPosTagger.class, POS.class, "ancora-ixa", posTags, jcas);
AssertAnnotations.assertTagset(IxaLemmatizer.class, POS.class, "ancora-ixa", posTags, jcas);
}
private JCas runTest(String aLanguage, String aVariant, String aText)
throws Exception
{
AssumeResource.assumeResource(IxaLemmatizer.class, "lemmatizer", aLanguage, aVariant);
AnalysisEngineDescription tagger = createEngineDescription(IxaPosTagger.class);
AnalysisEngineDescription lemmatizer = createEngineDescription(IxaLemmatizer.class,
IxaLemmatizer.PARAM_VARIANT, aVariant,
IxaLemmatizer.PARAM_PRINT_TAGSET, true);
AnalysisEngineDescription engine = createEngineDescription(tagger, lemmatizer);
JCas jcas = TestRunner.runTest(engine, aLanguage, aText);
return jcas;
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}