/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.ixa;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.util.JCasUtil.select;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.jcas.JCas;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations;
import de.tudarmstadt.ukp.dkpro.core.testing.AssumeResource;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner;
public class IxaPosTaggerTest
{
@Test
public void testBasque()
throws Exception
{
JCas jcas = runTest("eu", null, "Hau froga bat da .",
new String[] { "DET", "NOUN", "NUM", "VERB", "PUNCT" },
new String[] { "DET", "NOUN", "NUM", "VERB", "PUNCT" });
String[] posTags = { "ADJ", "ADP", "ADV", "AUX", "CONJ", "DET", "INTJ", "NOUN", "NUM",
"PART", "PRON", "PROPN", "PUNCT", "SYM", "VERB", "X" };
String[] unmappedPos = {};
AssertAnnotations.assertTagset(POS.class, "ud", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "ud", unmappedPos, jcas);
}
@Test
public void testDutch()
throws Exception
{
{
JCas jcas = runTest("nl", null, "Dit is een test .",
new String[] { "Pron", "V", "Art", "N", "Punc" },
new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "Adj", "Adv", "Art", "Conj", "Int", "MWU", "Misc", "N", "Num",
"Prep", "Pron", "Punc", "V" };
String[] unmappedPos = {};
AssertAnnotations.assertTagset(POS.class, "alpino-ixa", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "alpino-ixa", unmappedPos, jcas);
}
{
JCas jcas = runTest("nl", "maxent-100-c5-autodict01-alpino", "Dit is een test .",
new String[] { "Pron", "V", "Art", "N", "Punc" },
new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "Adj", "Adv", "Art", "Conj", "Int", "MWU", "Misc", "N", "Num",
"Prep", "Pron", "Punc", "V" };
String[] unmappedPos = {};
AssertAnnotations.assertTagset(POS.class, "alpino-ixa", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "alpino-ixa", unmappedPos, jcas);
}
}
@Test
public void testEnglish()
throws Exception
{
JCas jcas = runTest("en", null, "This is a test .",
new String[] { "DT", "VBZ", "DT", "NN", "." },
new String[] { "DET", "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "#", "$", "''", "(", ")", ",", ".", ":", "CC", "CD", "DT", "EX", "FW",
"HYPH", "IN", "JJ", "JJR", "JJS", "LS", "MD", "NIL", "NN", "NNP", "NNPS", "NNS",
"PDT", "POS", "PRF", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH",
"VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``",
"comic_strip" };
String[] unmappedPos = { "HYPH", "NIL", "PRF", "comic_strip" };
AssertAnnotations.assertTagset(POS.class, "ptb", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "ptb", unmappedPos, jcas);
runTest("en", null, "A neural net .",
new String[] { "DT", "JJ", "NN", "." },
new String[] { "DET", "ADJ", "NOUN", "PUNCT" });
runTest("en", null, "John is purchasing oranges .",
new String[] { "NNP", "VBZ", "VBG", "NNS", "." },
new String[] { "PROPN", "VERB", "VERB", "NOUN", "PUNCT" });
runTest("en", null, "The quick brown fox jumps over the lazy dog . \n",
new String[] { "DT", "JJ", "JJ", "NN", "VBZ", "IN", "DT", "JJ", "NN", "." },
new String[] { "DET", "ADJ", "ADJ", "NOUN", "VERB", "ADP", "DET", "ADJ", "NOUN",
"PUNCT" });
}
@Test
public void testFrench()
throws Exception
{
JCas jcas = runTest("fr", null, "C'est un test .",
new String[] { "V", "DET", "NC", "PONCT" },
new String[] { "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "ADJ", "ADJWH", "ADV", "ADVWH", "CC", "CLO", "CLR", "CLS", "CS", "DET",
"DETWH", "ET", "I", "NC", "NPP", "P", "P+D", "P+PRO", "PONCT", "PREF", "PRO",
"PROREL", "PROWH", "V", "VIMP", "VINF", "VPP", "VPR", "VS" };
String[] unmappedPos = {};
AssertAnnotations.assertTagset(POS.class, "melt", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "melt", unmappedPos, jcas);
}
@Test
public void testGalician()
throws Exception
{
JCas jcas = runTest("gl", null, "Este é un exame .",
new String[] { "DMS", "VIP3S00", "IMS0", "NCMS0", "Fp" },
new String[] { "POS", "POS", "POS", "POS", "POS" });
String[] posTags = { "A0CN", "A0CP", "A0CS", "A0FP", "A0FS", "A0MP", "A0MS", "AAMP",
"AQ0CS0", "CC", "CS", "DFP", "DFS", "DMP", "DMS", "DNS", "E", "Faa", "Fat", "Fc",
"Fca", "Fct", "Fd", "Fe", "Fg", "Fia", "Fit", "Fp", "Fpa", "Fpt", "Fra", "Frc",
"Ft", "Fx", "Fz", "GCP", "GCS", "GFP", "GFS", "GMP", "GMS", "IFP0", "IFS0", "IMP0",
"IMS0", "IMSA", "INP0", "INS0", "L", "MC0CN", "MC0FN", "MC0MN", "MO0FP", "MO0FS",
"MO0MP", "MO0MS", "MP0FS", "MP0MS", "NCCP0", "NCCS0", "NCFP0", "NCFPA", "NCFS0",
"NCFSA", "NCMN0", "NCMP0", "NCMPA", "NCMS0", "NP000", "O", "P1CPB0", "P1CPC0",
"P1CSC0", "P1CSN0", "P1CSO0", "P2CPB0", "P2CPC0", "P2CSA0", "P2CSB0", "P2CSD0",
"P3CNB0", "P3CNO0", "P3CNR0", "P3CPD0", "P3CSBP", "P3CSD0", "P3FPA0", "P3FPB0",
"P3FSA0", "P3FSB0", "P3MPA0", "P3MPB0", "P3MSA0", "P3MSB0", "QCN0", "QCP0", "QCS0",
"QFP0", "QFS0", "QMP0", "QMS0", "R0", "S", "TCN0", "TCP0", "TCS0", "TFS0", "TMP0",
"TMS0", "U", "VIA1P00", "VIA2S00", "VIA3P00", "VIA3S00", "VIC1P00", "VIC2P00",
"VIC2S00", "VIC3P00", "VIC3S00", "VIF1P00", "VIF1S00", "VIF2P00", "VIF2S00",
"VIF3P00", "VIF3S00", "VII1P00", "VII2S00", "VII3P00", "VII3S00", "VIP1P00",
"VIP1P00kkk#o#ferro", "VIP1S00", "VIP2P00", "VIP2S00", "VIP3P00", "VIP3S00",
"VIS1P00", "VIS1S00", "VIS2P00", "VIS2S00", "VIS3P00", "VIS3S00",
"VIS3S00kkk#de#sorte", "VM02P00", "VM02S00", "VN00000", "VN00000kkk#o#ferro",
"VN01P00", "VN02S00", "VN03P00", "VP00PC0", "VP00PF0", "VP00PM0", "VP00SC0",
"VP00SF0", "VP00SM0", "VP3S00kkk#o#ferro", "VSF2S00", "VSF3S00", "VSI1P00",
"VSI2S00", "VSI3P00", "VSI3S00", "VSP1P00", "VSP2P00", "VSP2S00", "VSP3P00",
"VSP3S00", "VX00000", "X1FPP", "X1FSP", "X1FSS", "X1MPP", "X1MPS", "X1MSP", "X1MSS",
"X2FPP", "X2FPS", "X2FSP", "X2FSS", "X2MPS", "X2MSP", "X2MSS", "X3FP0", "X3FS0",
"X3MP0", "X3MS0", "Y", "Z", "Z00", "Zkkk", "explanaciónkkkNCFS0" };
String[] unmappedPos = {};
AssertAnnotations.assertTagset(POS.class, "ctag-ixa", posTags, jcas);
// AssertAnnotations.assertTagsetMapping(POS.class, "melt", unmappedPos, jcas);
}
@Test
public void testGerman()
throws Exception
{
JCas jcas = runTest("de", null, "Dies ist ein Test .",
new String[] { "PDS", "VAFIN", "ART", "NN", "$." },
new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "$(", "$,", "$.", "--", "ADJA", "ADJD", "ADV", "APPO", "APPR",
"APPRART", "APZR", "ART", "CARD", "FM", "ITJ", "KOKOM", "KON", "KOUI", "KOUS", "NE",
"NN", "NNE", "PDAT", "PDS", "PIAT", "PIS", "PPER", "PPOSAT", "PPOSS", "PRELAT",
"PRELS", "PRF", "PROAV", "PTKA", "PTKANT", "PTKNEG", "PTKVZ", "PTKZU", "PWAT",
"PWAV", "PWS", "TRUNC", "UNKNOWN", "VAFIN", "VAIMP", "VAINF", "VAPP", "VMFIN",
"VMINF", "VMPP", "VVFIN", "VVIMP", "VVINF", "VVIZU", "VVPP", "XY" };
String[] unmappedPos = { "--", "NNE", "UNKNOWN" };
AssertAnnotations.assertTagset(POS.class, "stts", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "stts", unmappedPos, jcas);
}
@Test
public void testItalian()
throws Exception
{
JCas jcas = runTest("it", null, "Questa è una prova .",
new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT"},
new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "ADJ", "ADP", "ADV", "AUX", "CONJ", "DET", "INTJ", "NOUN", "NUM",
"PART", "PRON", "PROPN", "PUNCT", "SCONJ", "SYM", "VERB", "X" };
String[] unmappedPos = {};
AssertAnnotations.assertTagset(POS.class, "ud", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "ud", unmappedPos, jcas);
}
@Test
public void testEnglishExtra()
throws Exception
{
{
JCas jcas = runTest("en", "maxent-100-c5-baseline-autodict01-conll09", "This is a test .",
new String[] { "DT", "VBZ", "DT", "NN", "." },
new String[] { "DET", "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "#", "$", "''", "(", ")", ",", ".", ":", "CC", "CD", "DT", "EX",
"FW", "HYPH", "IN", "JJ", "JJR", "JJS", "LS", "MD", "NIL", "NN", "NNP", "NNPS",
"NNS", "PDT", "POS", "PRF", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM",
"TO", "UH", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB",
"``", "comic_strip" };
String[] unmappedPos = { "HYPH", "NIL", "PRF", "comic_strip" };
AssertAnnotations.assertTagset(POS.class, "ptb", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "ptb", unmappedPos, jcas);
}
{
JCas jcas = runTest("en", "perceptron-autodict01-ud", "This is a test .",
new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" },
new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "ADJ", "ADP", "ADV", "AUX", "CONJ", "DET", "INTJ", "NOUN", "NUM",
"PART", "PRON", "PROPN", "PUNCT", "SCONJ", "SYM", "VERB", "X" };
String[] unmappedPos = {};
AssertAnnotations.assertTagset(POS.class, "ud", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "ud", unmappedPos, jcas);
}
{
JCas jcas = runTest("en", "xpos-perceptron-autodict01-ud", "This is a test .",
new String[] { "DT", "VBZ", "DT", "NN", "." },
new String[] { "DET", "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "$", "''", ",", "-LRB-", "-RRB-", ".", ":", "ADD", "AFX", "CC",
"CD", "DT", "EX", "FW", "GW", "HYPH", "IN", "JJ", "JJR", "JJS", "LS", "MD",
"NFP", "NN", "NNP", "NNPS", "NNS", "PDT", "POS", "PRP", "PRP$", "RB", "RBR",
"RBS", "RP", "SYM", "TO", "UH", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT",
"WP", "WP$", "WRB", "XX", "``" };
String[] unmappedPos = { "ADD", "AFX", "GW", "HYPH", "NFP", "XX" };
AssertAnnotations.assertTagset(POS.class, "ptb", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "ptb", unmappedPos, jcas);
}
}
@Test
public void testSpanish()
throws Exception
{
JCas jcas = runTest("es", null, "Esta es una prueba .",
new String[] { "PD0FS000", "VSIP3S0", "DI0FS0", "NCFS000", "Fp" },
new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" });
String[] posTags = { "A00000", "AO0FP0", "AO0FS0", "AO0MP0", "AO0MS0", "AQ0000", "AQ000P",
"AQ0CC0", "AQ0CP0", "AQ0CS0", "AQ0FP0", "AQ0FPP", "AQ0FS0", "AQ0FSP", "AQ0MP0",
"AQ0MPP", "AQ0MS0", "AQ0MSP", "CC", "CS", "DA0CS0", "DA0FP0", "DA0FS0", "DA0MC0",
"DA0MP0", "DA0MS0", "DD0CP0", "DD0CS0", "DD0FP0", "DD0FS0", "DD0MP0", "DD0MS0",
"DE0CC0", "DI0CP0", "DI0CS0", "DI0FP0", "DI0FS0", "DI0MP0", "DI0MS0", "DN0CP0",
"DN0CS0", "DN0FP0", "DN0FS0", "DN0MP0", "DN0MS0", "DP1CPS", "DP1CSS", "DP1FPP",
"DP1FSP", "DP1MPP", "DP1MSP", "DP1MSS", "DP2CPS", "DP2CSS", "DP2FSP", "DP3CP0",
"DP3CS0", "DP3FS0", "DP3MP0", "DP3MS0", "DT0CC0", "DT0FS0", "DT0MP0", "DT0MS0",
"F0", "Faa", "Fat", "Fc", "Fd", "Fe", "Fg", "Fh", "Fia", "Fit", "Fp", "Fpa", "Fpt",
"Fs", "Fx", "Fz", "I", "N000000", "NCCC000", "NCCP000", "NCCS000", "NCF0000",
"NCFC000", "NCFP000", "NCFS000", "NCMC000", "NCMP000", "NCMS000", "NP00000",
"NPCC000", "P0000000", "P00CC000", "P01CP000", "P01CS000", "P02CS000", "P03CC000",
"PD0CP000", "PD0CS000", "PD0FP000", "PD0FS000", "PD0MP000", "PD0MS000", "PE0CC000",
"PI0CC000", "PI0CP000", "PI0CS000", "PI0FP000", "PI0FS000", "PI0MP000", "PI0MS000",
"PN0CP000", "PN0CS000", "PN0FP000", "PN0FS000", "PN0MP000", "PN0MS000", "PP1CC000",
"PP1CP000", "PP1CS000", "PP1CSN00", "PP1CSO00", "PP1FS000", "PP1MP000", "PP2CP000",
"PP2CP00P", "PP2CS000", "PP2CS00P", "PP2CSN00", "PP2CSO00", "PP3CC000", "PP3CCA00",
"PP3CCO00", "PP3CP000", "PP3CPA00", "PP3CPD00", "PP3CS000", "PP3CSA00", "PP3CSD00",
"PP3FP000", "PP3FPA00", "PP3FS000", "PP3FSA00", "PP3MP000", "PP3MPA00", "PP3MS000",
"PP3MSA00", "PR000000", "PR0CC000", "PR0CP000", "PR0CS000", "PR0FP000", "PR0FS000",
"PR0MP000", "PR0MS000", "PT000000", "PT0CC000", "PT0CP000", "PT0CS000", "PT0FP000",
"PT0MP000", "PT0MS000", "PX1FP0P0", "PX1FS0P0", "PX1FS0S0", "PX1MP0P0", "PX1MS0P0",
"PX1MS0S0", "PX2FS0S0", "PX2MS0S0", "PX3CS000", "PX3FP000", "PX3FS000", "PX3MP000",
"PX3MS000", "RG", "RN", "SPCMS", "SPS00", "SPSCC", "VAG0000", "VAIC1P0", "VAIC3P0",
"VAIC3S0", "VAIF1P0", "VAIF1S0", "VAIF2S0", "VAIF3P0", "VAIF3S0", "VAII1P0",
"VAII1S0", "VAII2S0", "VAII3P0", "VAII3S0", "VAII3SC", "VAIP1P0", "VAIP1S0",
"VAIP2P0", "VAIP2S0", "VAIP3P0", "VAIP3PC", "VAIP3S0", "VAIP3SC", "VAIS3P0",
"VAIS3S0", "VAM02S0", "VAM03S0", "VAN0000", "VAN00CC", "VAP00SM", "VASI1P0",
"VASI1S0", "VASI3P0", "VASI3S0", "VASP1S0", "VASP3P0", "VASP3S0", "VMG0000",
"VMIC1P0", "VMIC1S0", "VMIC2S0", "VMIC3P0", "VMIC3S0", "VMIF1P0", "VMIF1S0",
"VMIF2S0", "VMIF3P0", "VMIF3S0", "VMII1P0", "VMII1S0", "VMII2P0", "VMII2S0",
"VMII3P0", "VMII3S0", "VMII3SC", "VMIP1P0", "VMIP1S0", "VMIP1SC", "VMIP2P0",
"VMIP2S0", "VMIP300", "VMIP3P0", "VMIP3PC", "VMIP3S0", "VMIP3SC", "VMIS1P0",
"VMIS1S0", "VMIS2S0", "VMIS3P0", "VMIS3PC", "VMIS3S0", "VMIS3SC", "VMM01P0",
"VMM02S0", "VMM03P0", "VMM03S0", "VMN0000", "VMN00CC", "VMP00PF", "VMP00PM",
"VMP00SF", "VMP00SM", "VMPS0SM", "VMSI1P0", "VMSI1S0", "VMSI3P0", "VMSI3S0",
"VMSP1P0", "VMSP1S0", "VMSP2P0", "VMSP2S0", "VMSP3P0", "VMSP3S0", "VSG0000",
"VSIC1S0", "VSIC2S0", "VSIC3P0", "VSIC3S0", "VSIF1S0", "VSIF3P0", "VSIF3S0",
"VSII1P0", "VSII1S0", "VSII3P0", "VSII3S0", "VSIP1P0", "VSIP1S0", "VSIP2S0",
"VSIP3P0", "VSIP3S0", "VSIP3SC", "VSIS1S0", "VSIS3P0", "VSIS3S0", "VSM02S0",
"VSM03S0", "VSN0000", "VSP00SM", "VSSF3S0", "VSSI3P0", "VSSI3S0", "VSSP1S0",
"VSSP2S0", "VSSP3P0", "VSSP3S0", "W", "Z", "Zm", "Zp", "_" };
String[] unmappedPos = { "DA0MC0", "DT0FS0", "VAII2S0", "VAIP3PC", "VMII3SC", "VMIS3PC",
"VMPS0SM", "VSSF3S0", "_" };
AssertAnnotations.assertTagset(POS.class, "ancora-ixa", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "ancora-ixa", unmappedPos, jcas);
}
private JCas runTest(String language, String variant, String testDocument, String[] tags,
String[] tagClasses)
throws Exception
{
AssumeResource.assumeResource(IxaPosTagger.class, "tagger", language, variant);
AnalysisEngine engine = createEngine(IxaPosTagger.class,
IxaPosTagger.PARAM_VARIANT, variant,
IxaPosTagger.PARAM_PRINT_TAGSET, true);
JCas jcas = TestRunner.runTest(engine, language, testDocument);
AssertAnnotations.assertPOS(tagClasses, tags, select(jcas, POS.class));
return jcas;
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}