/* * Copyright 2010 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.treetagger; import static org.apache.commons.lang.StringUtils.repeat; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; import static org.apache.uima.fit.util.JCasUtil.select; import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.List; import org.annolab.tt4j.TreeTaggerWrapper; import org.apache.commons.lang.StringUtils; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.fit.factory.JCasBuilder; import org.apache.uima.fit.testing.util.HideOutput; import org.apache.uima.jcas.JCas; import org.junit.Assume; import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner; public class TreeTaggerPosTaggerTest { @Before public void initTrace() { // TreeTaggerWrapper.TRACE = true; } @Test public void testEnglish() throws Exception { String[] tagset = { "#", "$", "''", "(", ")", ",", ":", "CC", "CD", "DT", "EX", "FW", "IN", "IN/that", "JJ", "JJR", "JJS", "LS", "MD", "NN", "NNS", "NP", "NPS", "PDT", "POS", "PP", "PP$", "RB", "RBR", "RBS", "RP", "SENT", "SYM", "TO", "UH", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "VH", "VHD", "VHG", "VHN", "VHP", "VHZ", "VV", "VVD", "VVG", "VVN", "VVP", "VVZ", "WDT", "WP", "WP$", "WRB", "``" }; runTest("en", "ptb-tt", tagset, "This is a test .", new String[] { "this", "be", "a", "test", "." }, new String[] { "DT", "VBZ", "DT", "NN", "SENT" }, new String[] { "DET", "VERB", "DET", "NOUN", "PUNCT" }); runTest("en", "ptb-tt", tagset, "A neural net .", new String[] { "a", "neural", "net", "." }, new String[] { "DT", "JJ", "NN", "SENT" }, new String[] { "DET", "ADJ", "NOUN", "PUNCT" }); runTest("en", "ptb-tt", tagset, "John is purchasing oranges .", new String[] { "John", "be", "purchase", "orange", "." }, new String[] { "NP", "VBZ", "VVG", "NNS", "SENT" }, new String[] { "PROPN", "VERB", "VERB", "NOUN", "PUNCT" }); // TT4J per default runs TreeTagger with the -sgml option, so XML tags are not tagged runTest("en", "ptb-tt", tagset, "My homepage is <url> http://null.dummy </url> .", new String[] { "my", "homepage", "be", "http://null.dummy", "." }, new String[] { "PP$", "NN", "VBZ", "JJ", "SENT" }, new String[] { "PRON", "NOUN", "VERB", "ADJ", "PUNCT" }); } @Test public void testFrench() throws Exception { String[] tagset = { "ABR", "ADJ", "ADV", "DET:ART", "DET:POS", "INT", "KON", "NAM", "NOM", "NUM", "PRO", "PRO:DEM", "PRO:IND", "PRO:PER", "PRO:POS", "PRO:REL", "PRP", "PRP:det", "PUN", "PUN:cit", "SENT", "SYM", "VER:cond", "VER:futu", "VER:impe", "VER:impf", "VER:infi", "VER:pper", "VER:ppre", "VER:pres", "VER:simp", "VER:subi", "VER:subp" }; runTest("fr", "stein", tagset, "Ceci est un test .", new String[] { "ceci", "être", "un", "test", "." }, new String[] { "PRO:DEM", "VER:pres", "DET:ART", "NOM", "SENT" }, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); } @Test public void testGerman() throws Exception { String[] tagset = { "$(", "$,", "$.", "ADJ", "ADJA", "ADJD", "ADV", "APPO", "APPR", "APPRART", "APZR", "ART", "CARD", "FM", "ITJ", "KOKOM", "KON", "KOUI", "KOUS", "NE", "NN", "PAV", "PDAT", "PDS", "PIAT", "PIS", "PPER", "PPOSAT", "PPOSS", "PRELAT", "PRELS", "PRF", "PTKA", "PTKANT", "PTKNEG", "PTKVZ", "PTKZU", "PWAT", "PWAV", "PWS", "TRUNC", "VAFIN", "VAIMP", "VAINF", "VAPP", "VMFIN", "VMINF", "VMPP", "VVFIN", "VVIMP", "VVINF", "VVIZU", "VVPP", "XY" }; runTest("de", "stts", tagset, "10 Minuten sind das Mikro an und die Bühne frei .", new String[] { "10", "Minute", "sein", "die", "Mikro", "an", "und", "die", "Bühne", "frei", "." }, new String[] { "CARD", "NN", "VAFIN", "ART", "NN", "PTKVZ", "KON", "ART", "NN", "PTKVZ", "$." }, new String[] { "NUM", "NOUN", "VERB", "DET", "NOUN", "VERB", "CONJ", "DET", "NOUN", "VERB", "PUNCT" }); runTest("de", "stts", tagset, "Das ist ein Test .", new String[] { "die", "sein", "eine", "Test", "." }, new String[] { "PDS", "VAFIN", "ART", "NN", "$." }, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); } @Test public void testDutch() throws Exception { String[] tagset = { "$.", "adj", "adj*kop", "adjabbr", "adv", "advabbr", "conjcoord", "conjsubo", "det__art", "det__demo", "det__excl", "det__indef", "det__poss", "det__quest", "det__rel", "int", "noun*kop", "nounabbr", "nounpl", "nounprop", "nounsg", "num__card", "num__ord", "partte", "prep", "prepabbr", "pronadv", "prondemo", "pronindef", "pronpers", "pronposs", "pronquest", "pronrefl", "pronrel", "punc", "verbinf", "verbpapa", "verbpastpl", "verbpastsg", "verbpresp", "verbprespl", "verbpressg" }; runTest("nl", "tt", tagset, "Dit is een test .", new String[] { "dit", "zijn", "een", "test", "." }, new String[] { "prondemo", "verbpressg", "det__art", "nounsg", "$." }, new String[] { "POS", "POS", "POS", "POS", "POS" }); runTest("nl", "tt", tagset, "10 minuten op de microfoon en vrij podium .", new String[] { "@card@", "minuut", "op", "de", "microfoon", "en", "vrij", "podium", "." }, new String[] { "num__ord", "nounpl", "prep", "det__art", "nounsg", "conjcoord", "adj", "nounsg", "$." }, new String[] { "POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS" }); } @Test public void testMongolian() throws Exception { String[] tagset = { "\"", "(", ")", ",", "-", ".", ":", "?", "@", "CC", "CD", "DC", "FR", "IN", "JJ", "NN", "NNP", "PR", "RB", "SX", "VB", "|" }; runTest("mn", "tt", tagset, "Энэ нь тест юм .", new String[] { "-", "-", "тест", "-", "-" }, new String[] { "PR", "SX", "NN", "DC", "." }, new String[] { "POS", "POS", "POS", "POS", "POS" }); } @Test public void testGalician() throws Exception { String[] tagset = { "A0aa", "A0ap", "A0as", "A0fa", "A0fp", "A0fs", "A0ma", "A0mp", "A0ms", "Acap", "Acas", "Acfp", "Acfs", "Acmp", "Acms", "Asap", "Asas", "Asfp", "Asfs", "Asmp", "Asms", "Cc", "Cs", "Cs+Ddfp", "Cs+Ddfs", "Cs+Ddmp", "Cs+Ddms", "Ddfp", "Ddfp+Spfp", "Ddfs", "Ddfs+Spfs", "Ddmp", "Ddmp+Spmp", "Ddms", "Ddms+Spms", "Difp", "Difs", "Dimp", "Dims", "Dims+Spms", "Edfp", "Edfs", "Edmp", "Edmp+Inmp", "Edms", "Enfp", "Enfs", "Enmp", "Enms", "Enns", "Gdaa", "Gdap", "Gdas", "Gdfp", "Gdfs", "Gdmp", "Gdms", "Gnaa", "Gnap", "Gnas", "Gnfp", "Gnfs", "Gnmp", "Gnms", "Iafp", "Iafs", "Iamp", "Iams", "Idap", "Idas", "Idfp", "Idfp+Ddfp", "Idfs", "Idmp", "Idmp+Ddmp", "Idms", "In00", "Inaa", "Inap", "Inas", "Infp", "Infs", "Inmp", "Inmp+Ddmp", "Inms", "La0", "Lcc", "Lcc+Ddfp", "Lcc+Ddfs", "Lcc+Ddmp", "Lcc+Ddms", "Lcs", "Lp0", "Lp0+Ddfp", "Lp0+Ddfs", "Lp0+Ddmp", "Lp0+Ddms", "Lp0+Difp", "Lp0+Difs", "Lp0+Dimp", "Lp0+Dims", "Lp0+Edfp", "Lp0+Edfs", "Lp0+Edmp", "Lp0+Enfs", "Lp0+Enmp", "Lp0+Enns", "Lp0+Idfp", "Lp0+Idmp", "Lp0+Ncdms", "Lp0+Sp00", "Md1pfp", "Md1pfs", "Md1pmp", "Md1pms", "Md1sfp", "Md1sfs", "Md1smp", "Md1sms", "Md2pfp", "Md2pfs", "Md2pmp", "Md2pms", "Md2sfp", "Md2sfs", "Md2smp", "Md2sms", "Md3afp", "Md3afs", "Md3amp", "Md3ams", "Md3pfp", "Md3pfs", "Md3pmp", "Md3pms", "Md3sfp", "Md3sfs", "Md3smp", "Md3sms", "Mn1pfp", "Mn1pfs", "Mn1pmp", "Mn1pms", "Mn1sfp", "Mn1sfs", "Mn1smp", "Mn1sms", "Mn2pfp", "Mn2pfs", "Mn2pmp", "Mn2pms", "Mn2sfp", "Mn2sfs", "Mn2smp", "Mn2sms", "Mn3afp", "Mn3afs", "Mn3amp", "Mn3ams", "Mn3pfp", "Mn3pfs", "Mn3pmp", "Mn3pms", "Mn3sfp", "Mn3sfs", "Mn3smp", "Mn3sms", "Ncdap", "Ncdfp", "Ncdfs", "Ncdmp", "Ncdms", "Ncnap", "Ncnfp", "Ncnfs", "Ncnmp", "Ncnms", "Nodfp", "Nodfs", "Nodmp", "Nodms", "Nonfp", "Nonfs", "Nonmp", "Nonms", "P", "P+Ddfp", "P+Ddfs", "P+Ddmp", "P+Ddms", "P+Difp", "P+Difs", "P+Dimp", "P+Dims", "P+Edfp", "P+Edfs", "P+Edmp", "P+Edmp+Inmp", "P+Edms", "P+Enfp", "P+Enfs", "P+Enmp", "P+Enms", "P+Enns", "P+Iafp", "P+Iamp", "P+Idfp", "P+Idfp+Ddfp", "P+Idfs", "P+Idmp", "P+Idms", "P+Infp", "P+Infs", "P+Inmp", "P+Inms", "P+Ncdfs", "P+Ncdms", "P+Ncnfs", "P+Rtp3fp", "P+Rtp3fs", "P+Rtp3mp", "P+Rtp3ms", "P+Sp00", "P+Spfp", "P+Spfs", "P+Spmp", "P+Spms", "P+Wn", "P-Rtp3mp", "Q", "Q!", "Q\"", "Q'", "Q(", "Q)", "Q,", "Q-", "Q.", "Q...", "Q/", "Q:", "Q;", "Q?", "Q[", "Q]", "Q_", "Q{", "Q}", "Q¡", "Q¿", "Raa1ap", "Raa1as", "Raa1fp", "Raa1fs", "Raa1mp", "Raa1ms", "Raa2ap", "Raa2as", "Raa2fp", "Raa2fs", "Raa2mp", "Raa2ms", "Raa3fp", "Raa3fs", "Raa3mp", "Raa3ms", "Rad1ap", "Rad1ap+Raa3ms", "Rad1as", "Rad1as+Raa3fs", "Rad1as+Raa3ms", "Rad1fp", "Rad1fs", "Rad1mp", "Rad1mp+Raa3fs", "Rad1ms", "Rad2ap", "Rad2ap+Raa3ms", "Rad2as", "Rad2fp", "Rad2fs", "Rad2mp", "Rad2mp+Raa3ms", "Rad2ms", "Rad3ap", "Rad3as", "Rad3as+Raa3ms", "Rad3fp", "Rad3fs", "Rad3fs+Raa3ms", "Rad3mp", "Rad3ms", "Rad3ms+Raa3fp", "Rad3ms+Raa3ms", "Raf1ap", "Raf1as", "Raf1fp", "Raf1fs", "Raf1mp", "Raf1ms", "Raf2ap", "Raf2as", "Raf2fp", "Raf2fs", "Raf2mp", "Raf2ms", "Rao3aa", "Rtn1ap", "Rtn1as", "Rtn1fp", "Rtn1fs", "Rtn1mp", "Rtn1ms", "Rtn2ap", "Rtn2as", "Rtn2fp", "Rtn2fs", "Rtn2mp", "Rtn2ms", "Rtn3ap", "Rtn3as", "Rtn3fp", "Rtn3fs", "Rtn3mp", "Rtn3ms", "Rtn3ns", "Rtp1ap", "Rtp1as", "Rtp1fp", "Rtp1fs", "Rtp1mp", "Rtp1ms", "Rtp2ap", "Rtp2as", "Rtp2fp", "Rtp2fs", "Rtp2mp", "Rtp2ms", "Rtp3aa", "Rtp3ap", "Rtp3as", "Rtp3fp", "Rtp3fs", "Rtp3mp", "Rtp3ms", "Rtp3ns", "SA0fs", "Scaa", "Scap", "Scas", "Scfa", "Scfp", "Scfs", "Scma", "Scmp", "Scms", "Sp00", "Spf0", "Spfp", "Spfs", "Spm0", "Spmp", "Spms", "Tdfp", "Tdfs", "Tdmp", "Tdms", "Tnaa", "Tnap", "Tnas", "Tnfp", "Tnfs", "Tnmp", "Tnms", "V0f000", "V0f000+Raa1ap", "V0f000+Raa1as", "V0f000+Raa1fp", "V0f000+Raa1mp", "V0f000+Raa1ms", "V0f000+Raa2ap", "V0f000+Raa2ms", "V0f000+Raa3fp", "V0f000+Raa3fs", "V0f000+Raa3mp", "V0f000+Raa3ms", "V0f000+Rad1ap", "V0f000+Rad1fs", "V0f000+Rad1mp", "V0f000+Rad1ms", "V0f000+Rad2ap", "V0f000+Rad3ap", "V0f000+Rad3as", "V0f000+Rad3as+Raa3fs", "V0f000+Rad3as+Raa3ms", "V0f000+Rad3fp", "V0f000+Rad3fs", "V0f000+Rad3fs+Raa3ms", "V0f000+Rad3mp", "V0f000+Rad3mp+Raa3mp", "V0f000+Rad3ms", "V0f000+Rad3ms+Raa3fp", "V0f000+Raf1ap", "V0f000+Raf1as", "V0f000+Raf2as", "V0f000+Raf2fp", "V0f000+Rao3aa", "V0f000+Rao3aa+Rad1ap", "V0f10p", "V0f10p+Raa1ap", "V0f10p+Raa3ms", "V0f20p", "V0f20s", "V0f30p", "V0f30p+Rad3fs", "V0f30p+Rao3aa", "V0m10p", "V0m20p", "V0m20p+Raa3ms", "V0m20p+Raf2ap", "V0m20s", "V0m20s+Raa2as", "V0m20s+Rad3ap", "V0m20s+Rad3mp", "V0m20s+Raf2as", "V0m20s+Raf2ms", "V0p0fp", "V0p0fs", "V0p0mp", "V0p0ms", "V0x000", "V0x000+Raa1ap", "V0x000+Raa3fp", "V0x000+Raa3fs", "V0x000+Raa3mp", "V0x000+Raa3ms", "V0x000+Rad1ap", "V0x000+Rad1as+Raa3mp", "V0x000+Rad3ap", "V0x000+Rad3as", "V0x000+Rad3fp", "V0x000+Rad3fs", "V0x000+Rad3mp", "V0x000+Rad3ms", "V0x000+Rad3ms+Raa3ms", "V0x000+Rao3aa", "V0x10p", "V0x20p", "Vci10p", "Vci10s", "Vci10s+Raa3mp", "Vci10s+Raa3ms", "Vci20p", "Vci20s", "Vci20s+Raa2as", "Vci30p", "Vci30p+Rad1ap", "Vci30p+Rao3aa", "Vci30s", "Vci30s+Raa3ms", "Vci30s+Rad1ap", "Vci30s+Rad1as", "Vci30s+Rad1ms", "Vci30s+Rad3ap", "Vci30s+Rad3as", "Vci30s+Rad3fs", "Vci30s+Rad3ms", "Vci30s+Rao3aa", "Vcia0s", "Vei10p", "Vei10p+Raa3ms", "Vei10p+Rad3ms", "Vei10p+Raf1ap", "Vei10s", "Vei10s+Raa1as", "Vei10s+Raa1ms", "Vei10s+Raa3fp", "Vei10s+Raa3fs", "Vei10s+Raa3mp", "Vei10s+Raa3ms", "Vei10s+Rad3as", "Vei10s+Rad3as+Raa3ms", "Vei10s+Rad3mp", "Vei10s+Rad3ms", "Vei10s+Raf1as", "Vei10s+Raf1ms", "Vei20p", "Vei20s", "Vei20s+Raa3ms", "Vei20s+Rad1as", "Vei20s+Raf2as", "Vei30p", "Vei30p+Raa1ap", "Vei30p+Raa1as", "Vei30p+Raa3fs", "Vei30p+Raa3ms", "Vei30p+Rad1as", "Vei30p+Rad3as", "Vei30p+Rad3fp", "Vei30p+Rad3fs", "Vei30p+Rad3mp", "Vei30p+Rad3ms", "Vei30p+Rao3aa", "Vei30p+Rao3aa+Rad3fp", "Vei30p+Rao3aa+Rad3fs", "Vei30s", "Vei30s+Raa1ap", "Vei30s+Raa1as", "Vei30s+Raa3as", "Vei30s+Raa3fp", "Vei30s+Raa3fs", "Vei30s+Raa3mp", "Vei30s+Raa3ms", "Vei30s+Rad1ap", "Vei30s+Rad1as", "Vei30s+Rad1fs", "Vei30s+Rad1ms", "Vei30s+Rad1ms+Raa3fp", "Vei30s+Rad3ap", "Vei30s+Rad3as", "Vei30s+Rad3fp", "Vei30s+Rad3fs", "Vei30s+Rad3mp", "Vei30s+Rad3ms", "Vei30s+Rao3aa", "Vei30s+Rao3aa+Rad3as", "Vei30s+Rao3aa+Rad3ms", "Ves10p", "Ves10s", "Ves20p", "Ves20s", "Ves30p", "Ves30s", "Vesa0s", "Vfi10p", "Vfi10p+Raa1ap", "Vfi10p+Raa3ms", "Vfi10p+Rad3fp", "Vfi10p+Raf1ap", "Vfi10s", "Vfi10s+Rad3mp", "Vfi20p", "Vfi20s", "Vfi30p", "Vfi30p+Rad3fs", "Vfi30p+Rad3mp", "Vfi30p+Rad3ms", "Vfi30p+Rad3ms+Raa3ms", "Vfi30p+Rao3aa", "Vfi30p+Rao3aa+Rad3as", "Vfi30s", "Vfi30s+Raa3fp", "Vfi30s+Raa3fs", "Vfi30s+Raa3mp", "Vfi30s+Raa3ms", "Vfi30s+Rad3as", "Vfi30s+Rad3fp", "Vfi30s+Rad3fs", "Vfi30s+Rad3mp", "Vfi30s+Rad3ms", "Vfi30s+Rao3aa", "Vfi30s+Rao3aa+Rad3as", "Vfi30s+Rao3aa+Rad3fs", "Vfs10p", "Vfs10s", "Vfs20p", "Vfs20s", "Vfs30p", "Vfs30s", "Vfsa0s", "Vii10p", "Vii10p+Raa3fs", "Vii10s", "Vii10s+Rad3ap", "Vii20p", "Vii20s", "Vii30p", "Vii30p+Raa3fp", "Vii30p+Raa3fs", "Vii30p+Rad1ms", "Vii30p+Rad3fp", "Vii30p+Rad3mp", "Vii30p+Rao3aa", "Vii30s", "Vii30s+Raa3fp", "Vii30s+Rad1ap", "Vii30s+Rad1as", "Vii30s+Rad3as", "Vii30s+Rad3fs", "Vii30s+Rad3mp", "Vii30s+Rad3ms", "Vii30s+Rao3aa", "Vii30s+Rao3aa+Rad3as", "Viia0s", "Vli10p", "Vli10s", "Vli20p", "Vli20s", "Vli30p", "Vli30p+Rad3as", "Vli30p+Rao3aa", "Vli30s", "Vli30s+Raa3ms", "Vli30s+Rad3as", "Vli30s+Rao3aa", "Vlia0s", "Vpi10p", "Vpi10p+Raa1ap", "Vpi10p+Raa2ap", "Vpi10p+Raa3fp", "Vpi10p+Raa3fs", "Vpi10p+Raa3mp", "Vpi10p+Raa3ms", "Vpi10p+Rad1ap", "Vpi10p+Rad2fs", "Vpi10p+Rad3mp", "Vpi10p+Raf1ap", "Vpi10p+Raf1fp", "Vpi10p+Raf1mp", "Vpi10s", "Vpi10s+Raa1as", "Vpi10s+Raa1ms", "Vpi10s+Raa3fp", "Vpi10s+Raa3fs", "Vpi10s+Raa3mp", "Vpi10s+Raa3ms", "Vpi10s+Rad1as", "Vpi10s+Rad3as", "Vpi10s+Rad3mp", "Vpi10s+Rad3ms", "Vpi10s+Raf1as", "Vpi10s+Raf1fs", "Vpi10s+Raf1ms", "Vpi20p", "Vpi20s", "Vpi20s+Raa2as", "Vpi20s+Raa3fs", "Vpi30p", "Vpi30p+Raa1ap", "Vpi30p+Raa3fp", "Vpi30p+Raa3fs", "Vpi30p+Raa3mp", "Vpi30p+Raa3ms", "Vpi30p+Rad1ap", "Vpi30p+Rad1as", "Vpi30p+Rad1mp", "Vpi30p+Rad3ap", "Vpi30p+Rad3as", "Vpi30p+Rad3fp", "Vpi30p+Rad3fs", "Vpi30p+Rad3mp", "Vpi30p+Rad3ms", "Vpi30p+Rao3aa", "Vpi30p+Rao3aa+Rad1ap", "Vpi30s", "Vpi30s+Raa1ap", "Vpi30s+Raa1as", "Vpi30s+Raa1mp", "Vpi30s+Raa2as", "Vpi30s+Raa3fp", "Vpi30s+Raa3fs", "Vpi30s+Raa3mp", "Vpi30s+Raa3ms", "Vpi30s+Rad1ap", "Vpi30s+Rad1ap+Raa3fs", "Vpi30s+Rad1as", "Vpi30s+Rad1fs", "Vpi30s+Rad1ms", "Vpi30s+Rad2as", "Vpi30s+Rad2fp", "Vpi30s+Rad3ap", "Vpi30s+Rad3as", "Vpi30s+Rad3fp", "Vpi30s+Rad3fs", "Vpi30s+Rad3mp", "Vpi30s+Rad3ms", "Vpi30s+Rao3aa", "Vpi30s+Rao3aa+Rad1ap", "Vpi30s+Rao3aa+Rad3ap", "Vpi30s+Rao3aa+Rad3fs", "Vpi30s+Rao3aa+Rad3mp", "Vpi30s+Rao3aa+Rad3ms", "Vps10p", "Vps10p+Raa1ap", "Vps10p+Raa3ms", "Vps10p+Raf1ap", "Vps10s", "Vps20p", "Vps20s", "Vps30p", "Vps30p+Rad3fs", "Vps30p+Rao3aa", "Vps30s", "Vps30s+Raa1ap", "Vps30s+Rad1as", "Vps30s+Rao3aa", "Vpsa0s", "Wg", "Wm", "Wn", "Wr", "Y", "Za00", "Zaas", "Zafp", "Zafs", "Zamp", "Zams", "Zams+Ncnms", "Zf00", "Zg00", "Zgaa", "Zgfa", "Zgfp", "Zgfs", "Zgma", "Zgmp", "Zgms", "Zo00", "Zs00", "Zs00+Ncdmp", "Zs00+Ncnmp", "Zs00+Ncnms" }; runTest("gl", "xiada", tagset, "Este é un exame .", new String[] { "este", "ser", "un", "exame", "." }, new String[] { "Enms", "Vpi30s", "Dims", "Scms", "Q." }, new String[] { "PRON", "VERB", "DET", "NOUN", "PUNCT" }); } @Test public void testPolish() throws Exception { String[] tagset = { "SENT", "adj:pl:acc:f:com", "adj:pl:acc:f:pos", "adj:pl:acc:f:sup", "adj:pl:acc:m1:com", "adj:pl:acc:m1:pos", "adj:pl:acc:m1:sup", "adj:pl:acc:m2:com", "adj:pl:acc:m2:pos", "adj:pl:acc:m2:sup", "adj:pl:acc:m3:com", "adj:pl:acc:m3:pos", "adj:pl:acc:m3:sup", "adj:pl:acc:n:com", "adj:pl:acc:n:pos", "adj:pl:acc:n:sup", "adj:pl:dat:f:com", "adj:pl:dat:f:pos", "adj:pl:dat:f:sup", "adj:pl:dat:m1:com", "adj:pl:dat:m1:pos", "adj:pl:dat:m1:sup", "adj:pl:dat:m2:pos", "adj:pl:dat:m3:com", "adj:pl:dat:m3:pos", "adj:pl:dat:n:pos", "adj:pl:dat:n:sup", "adj:pl:gen:f:com", "adj:pl:gen:f:pos", "adj:pl:gen:f:sup", "adj:pl:gen:m1:com", "adj:pl:gen:m1:pos", "adj:pl:gen:m1:sup", "adj:pl:gen:m2:com", "adj:pl:gen:m2:pos", "adj:pl:gen:m2:sup", "adj:pl:gen:m3:com", "adj:pl:gen:m3:pos", "adj:pl:gen:m3:sup", "adj:pl:gen:n:com", "adj:pl:gen:n:pos", "adj:pl:gen:n:sup", "adj:pl:inst:f:com", "adj:pl:inst:f:pos", "adj:pl:inst:f:sup", "adj:pl:inst:m1:com", "adj:pl:inst:m1:pos", "adj:pl:inst:m1:sup", "adj:pl:inst:m2:pos", "adj:pl:inst:m3:com", "adj:pl:inst:m3:pos", "adj:pl:inst:m3:sup", "adj:pl:inst:n:com", "adj:pl:inst:n:pos", "adj:pl:inst:n:sup", "adj:pl:loc:f:com", "adj:pl:loc:f:pos", "adj:pl:loc:f:sup", "adj:pl:loc:m1:com", "adj:pl:loc:m1:pos", "adj:pl:loc:m1:sup", "adj:pl:loc:m2:pos", "adj:pl:loc:m3:com", "adj:pl:loc:m3:pos", "adj:pl:loc:m3:sup", "adj:pl:loc:n:com", "adj:pl:loc:n:pos", "adj:pl:loc:n:sup", "adj:pl:nom:f:com", "adj:pl:nom:f:pos", "adj:pl:nom:f:sup", "adj:pl:nom:m1:com", "adj:pl:nom:m1:pos", "adj:pl:nom:m1:sup", "adj:pl:nom:m2:com", "adj:pl:nom:m2:pos", "adj:pl:nom:m2:sup", "adj:pl:nom:m3:com", "adj:pl:nom:m3:pos", "adj:pl:nom:m3:sup", "adj:pl:nom:n:com", "adj:pl:nom:n:pos", "adj:pl:nom:n:sup", "adj:sg:acc:f:com", "adj:sg:acc:f:pos", "adj:sg:acc:f:sup", "adj:sg:acc:m1:com", "adj:sg:acc:m1:pos", "adj:sg:acc:m1:sup", "adj:sg:acc:m2:com", "adj:sg:acc:m2:pos", "adj:sg:acc:m2:sup", "adj:sg:acc:m3:com", "adj:sg:acc:m3:pos", "adj:sg:acc:m3:sup", "adj:sg:acc:n:com", "adj:sg:acc:n:pos", "adj:sg:acc:n:sup", "adj:sg:dat:f:com", "adj:sg:dat:f:pos", "adj:sg:dat:f:sup", "adj:sg:dat:m1:com", "adj:sg:dat:m1:pos", "adj:sg:dat:m1:sup", "adj:sg:dat:m2:pos", "adj:sg:dat:m3:com", "adj:sg:dat:m3:pos", "adj:sg:dat:m3:sup", "adj:sg:dat:n:com", "adj:sg:dat:n:pos", "adj:sg:dat:n:sup", "adj:sg:gen:f:com", "adj:sg:gen:f:pos", "adj:sg:gen:f:sup", "adj:sg:gen:m1:com", "adj:sg:gen:m1:pos", "adj:sg:gen:m1:sup", "adj:sg:gen:m2:pos", "adj:sg:gen:m2:sup", "adj:sg:gen:m3:com", "adj:sg:gen:m3:pos", "adj:sg:gen:m3:sup", "adj:sg:gen:n:com", "adj:sg:gen:n:pos", "adj:sg:gen:n:sup", "adj:sg:inst:f:com", "adj:sg:inst:f:pos", "adj:sg:inst:f:sup", "adj:sg:inst:m1:com", "adj:sg:inst:m1:pos", "adj:sg:inst:m1:sup", "adj:sg:inst:m2:com", "adj:sg:inst:m2:pos", "adj:sg:inst:m2:sup", "adj:sg:inst:m3:com", "adj:sg:inst:m3:pos", "adj:sg:inst:m3:sup", "adj:sg:inst:n:com", "adj:sg:inst:n:pos", "adj:sg:inst:n:sup", "adj:sg:loc:f:com", "adj:sg:loc:f:pos", "adj:sg:loc:f:sup", "adj:sg:loc:m1:com", "adj:sg:loc:m1:pos", "adj:sg:loc:m1:sup", "adj:sg:loc:m2:com", "adj:sg:loc:m2:pos", "adj:sg:loc:m3:com", "adj:sg:loc:m3:pos", "adj:sg:loc:m3:sup", "adj:sg:loc:n:com", "adj:sg:loc:n:pos", "adj:sg:loc:n:sup", "adj:sg:nom:f:com", "adj:sg:nom:f:pos", "adj:sg:nom:f:sup", "adj:sg:nom:m1:com", "adj:sg:nom:m1:pos", "adj:sg:nom:m1:sup", "adj:sg:nom:m2:com", "adj:sg:nom:m2:pos", "adj:sg:nom:m2:sup", "adj:sg:nom:m3:com", "adj:sg:nom:m3:pos", "adj:sg:nom:m3:sup", "adj:sg:nom:n:com", "adj:sg:nom:n:pos", "adj:sg:nom:n:sup", "adj:sg:voc:f:pos", "adj:sg:voc:f:sup", "adj:sg:voc:m1:pos", "adj:sg:voc:m1:sup", "adj:sg:voc:m2:pos", "adj:sg:voc:m3:pos", "adj:sg:voc:n:pos", "adja", "adjc", "adjp", "adv", "adv:com", "adv:pos", "adv:sup", "aglt:pl:pri:imperf:nwok", "aglt:pl:pri:imperf:wok", "aglt:pl:sec:imperf:nwok", "aglt:sg:pri:imperf:nwok", "aglt:sg:pri:imperf:wok", "aglt:sg:sec:imperf:nwok", "aglt:sg:sec:imperf:wok", "aglt:sg:ter:imperf:nwok", "bedzie:pl:pri:imperf", "bedzie:pl:sec:imperf", "bedzie:pl:ter:imperf", "bedzie:sg:pri:imperf", "bedzie:sg:sec:imperf", "bedzie:sg:ter:imperf", "brev:npun", "brev:pun", "burk", "comp", "conj", "depr:pl:acc:m2", "depr:pl:nom:m2", "fin:pl:pri:imperf", "fin:pl:pri:perf", "fin:pl:sec:imperf", "fin:pl:sec:perf", "fin:pl:ter:imperf", "fin:pl:ter:perf", "fin:sg:pri:imperf", "fin:sg:pri:perf", "fin:sg:sec:imperf", "fin:sg:sec:perf", "fin:sg:ter:imperf", "fin:sg:ter:perf", "ger:pl:dat:n:perf:aff", "ger:pl:gen:n:imperf:aff", "ger:pl:gen:n:perf:aff", "ger:pl:inst:n:imperf:aff", "ger:pl:inst:n:perf:aff", "ger:pl:loc:n:imperf:aff", "ger:pl:nom:n:imperf:aff", "ger:pl:nom:n:perf:aff", "ger:sg:acc:n:imperf:aff", "ger:sg:acc:n:imperf:neg", "ger:sg:acc:n:perf:aff", "ger:sg:acc:n:perf:neg", "ger:sg:dat:n:imperf:aff", "ger:sg:dat:n:perf:aff", "ger:sg:gen:n:imperf:aff", "ger:sg:gen:n:imperf:neg", "ger:sg:gen:n:perf:aff", "ger:sg:gen:n:perf:neg", "ger:sg:inst:n:imperf:aff", "ger:sg:inst:n:imperf:neg", "ger:sg:inst:n:perf:aff", "ger:sg:inst:n:perf:neg", "ger:sg:loc:n:imperf:aff", "ger:sg:loc:n:imperf:neg", "ger:sg:loc:n:perf:aff", "ger:sg:loc:n:perf:neg", "ger:sg:nom:n:imperf:aff", "ger:sg:nom:n:imperf:neg", "ger:sg:nom:n:perf:aff", "ger:sg:nom:n:perf:neg", "imps:imperf", "imps:perf", "impt:pl:pri:imperf", "impt:pl:pri:perf", "impt:pl:sec:imperf", "impt:pl:sec:perf", "impt:sg:sec:imperf", "impt:sg:sec:perf", "inf:imperf", "inf:perf", "interj", "interp", "num:pl:acc:f:congr", "num:pl:acc:f:rec", "num:pl:acc:m1:congr", "num:pl:acc:m1:rec", "num:pl:acc:m2:congr", "num:pl:acc:m2:rec", "num:pl:acc:m3:congr", "num:pl:acc:m3:rec", "num:pl:acc:n:congr", "num:pl:acc:n:rec", "num:pl:dat:f:congr", "num:pl:dat:m1:congr", "num:pl:dat:m2:congr", "num:pl:dat:m3:congr", "num:pl:dat:m3:rec", "num:pl:dat:n:congr", "num:pl:gen:f:congr", "num:pl:gen:f:rec", "num:pl:gen:m1:congr", "num:pl:gen:m1:rec", "num:pl:gen:m2:congr", "num:pl:gen:m2:rec", "num:pl:gen:m3:congr", "num:pl:gen:m3:rec", "num:pl:gen:n:congr", "num:pl:gen:n:rec", "num:pl:inst:f:congr", "num:pl:inst:m1:congr", "num:pl:inst:m2:congr", "num:pl:inst:m3:congr", "num:pl:inst:m3:rec", "num:pl:inst:n:congr", "num:pl:loc:f:congr", "num:pl:loc:f:rec", "num:pl:loc:m1:congr", "num:pl:loc:m2:congr", "num:pl:loc:m2:rec", "num:pl:loc:m3:congr", "num:pl:loc:m3:rec", "num:pl:loc:n:congr", "num:pl:nom:f:congr", "num:pl:nom:f:rec", "num:pl:nom:m1:congr", "num:pl:nom:m1:rec", "num:pl:nom:m2:congr", "num:pl:nom:m2:rec", "num:pl:nom:m3:congr", "num:pl:nom:m3:rec", "num:pl:nom:n:congr", "num:pl:nom:n:rec", "num:sg:acc:m3:rec", "num:sg:gen:m1:congr", "num:sg:gen:m3:congr", "num:sg:gen:m3:rec", "num:sg:nom:f:rec", "num:sg:nom:m3:congr", "num:sg:nom:m3:rec", "num:sg:nom:n:rec", "numcol:pl:acc:m1:rec", "numcol:pl:acc:n:rec", "numcol:pl:dat:m1:congr", "numcol:pl:gen:m1:congr", "numcol:pl:gen:m1:rec", "numcol:pl:gen:n:congr", "numcol:pl:gen:n:rec", "numcol:pl:inst:m1:rec", "numcol:pl:inst:n:rec", "numcol:pl:nom:m1:rec", "numcol:pl:nom:n:rec", "pact:pl:acc:f:imperf:aff", "pact:pl:acc:f:imperf:neg", "pact:pl:acc:m1:imperf:aff", "pact:pl:acc:m2:imperf:aff", "pact:pl:acc:m3:imperf:aff", "pact:pl:acc:m3:imperf:neg", "pact:pl:acc:n:imperf:aff", "pact:pl:acc:n:imperf:neg", "pact:pl:dat:f:imperf:aff", "pact:pl:dat:m1:imperf:aff", "pact:pl:dat:m2:imperf:aff", "pact:pl:dat:m3:imperf:aff", "pact:pl:dat:n:imperf:aff", "pact:pl:gen:f:imperf:aff", "pact:pl:gen:f:imperf:neg", "pact:pl:gen:m1:imperf:aff", "pact:pl:gen:m1:imperf:neg", "pact:pl:gen:m2:imperf:aff", "pact:pl:gen:m3:imperf:aff", "pact:pl:gen:m3:imperf:neg", "pact:pl:gen:n:imperf:aff", "pact:pl:inst:f:imperf:aff", "pact:pl:inst:m1:imperf:aff", "pact:pl:inst:m2:imperf:aff", "pact:pl:inst:m3:imperf:aff", "pact:pl:inst:m3:imperf:neg", "pact:pl:inst:n:imperf:aff", "pact:pl:inst:n:imperf:neg", "pact:pl:loc:f:imperf:aff", "pact:pl:loc:m1:imperf:aff", "pact:pl:loc:m3:imperf:aff", "pact:pl:loc:m3:imperf:neg", "pact:pl:loc:n:imperf:aff", "pact:pl:loc:n:imperf:neg", "pact:pl:nom:f:imperf:aff", "pact:pl:nom:f:imperf:neg", "pact:pl:nom:m1:imperf:aff", "pact:pl:nom:m2:imperf:aff", "pact:pl:nom:m3:imperf:aff", "pact:pl:nom:n:imperf:aff", "pact:pl:nom:n:imperf:neg", "pact:sg:acc:f:imperf:aff", "pact:sg:acc:f:imperf:neg", "pact:sg:acc:m1:imperf:aff", "pact:sg:acc:m2:imperf:aff", "pact:sg:acc:m3:imperf:aff", "pact:sg:acc:n:imperf:aff", "pact:sg:acc:n:imperf:neg", "pact:sg:dat:f:imperf:aff", "pact:sg:dat:m1:imperf:aff", "pact:sg:dat:m2:imperf:aff", "pact:sg:dat:m3:imperf:aff", "pact:sg:dat:n:imperf:aff", "pact:sg:gen:f:imperf:aff", "pact:sg:gen:f:imperf:neg", "pact:sg:gen:m1:imperf:aff", "pact:sg:gen:m1:imperf:neg", "pact:sg:gen:m2:imperf:aff", "pact:sg:gen:m3:imperf:aff", "pact:sg:gen:m3:imperf:neg", "pact:sg:gen:n:imperf:aff", "pact:sg:gen:n:imperf:neg", "pact:sg:inst:f:imperf:aff", "pact:sg:inst:f:imperf:neg", "pact:sg:inst:m1:imperf:aff", "pact:sg:inst:m1:imperf:neg", "pact:sg:inst:m2:imperf:aff", "pact:sg:inst:m2:imperf:neg", "pact:sg:inst:m3:imperf:aff", "pact:sg:inst:m3:imperf:neg", "pact:sg:inst:n:imperf:aff", "pact:sg:loc:f:imperf:aff", "pact:sg:loc:f:imperf:neg", "pact:sg:loc:m1:imperf:aff", "pact:sg:loc:m2:imperf:aff", "pact:sg:loc:m3:imperf:aff", "pact:sg:loc:m3:imperf:neg", "pact:sg:loc:n:imperf:aff", "pact:sg:loc:n:imperf:neg", "pact:sg:nom:f:imperf:aff", "pact:sg:nom:f:imperf:neg", "pact:sg:nom:m1:imperf:aff", "pact:sg:nom:m1:imperf:neg", "pact:sg:nom:m2:imperf:aff", "pact:sg:nom:m3:imperf:aff", "pact:sg:nom:m3:imperf:neg", "pact:sg:nom:n:imperf:aff", "pact:sg:nom:n:imperf:neg", "pact:sg:voc:m1:imperf:aff", "pant:perf", "pcon:imperf", "ppas:pl:acc:f:imperf:aff", "ppas:pl:acc:f:perf:aff", "ppas:pl:acc:f:perf:neg", "ppas:pl:acc:m1:imperf:aff", "ppas:pl:acc:m1:imperf:neg", "ppas:pl:acc:m1:perf:aff", "ppas:pl:acc:m2:imperf:aff", "ppas:pl:acc:m2:perf:aff", "ppas:pl:acc:m3:imperf:aff", "ppas:pl:acc:m3:perf:aff", "ppas:pl:acc:m3:perf:neg", "ppas:pl:acc:n:imperf:aff", "ppas:pl:acc:n:imperf:neg", "ppas:pl:acc:n:perf:aff", "ppas:pl:acc:n:perf:neg", "ppas:pl:dat:f:imperf:aff", "ppas:pl:dat:f:perf:aff", "ppas:pl:dat:f:perf:neg", "ppas:pl:dat:m1:imperf:aff", "ppas:pl:dat:m1:perf:aff", "ppas:pl:dat:m1:perf:neg", "ppas:pl:dat:m2:imperf:aff", "ppas:pl:dat:m3:imperf:aff", "ppas:pl:dat:m3:perf:aff", "ppas:pl:dat:n:imperf:aff", "ppas:pl:dat:n:perf:aff", "ppas:pl:gen:f:imperf:aff", "ppas:pl:gen:f:imperf:neg", "ppas:pl:gen:f:perf:aff", "ppas:pl:gen:f:perf:neg", "ppas:pl:gen:m1:imperf:aff", "ppas:pl:gen:m1:imperf:neg", "ppas:pl:gen:m1:perf:aff", "ppas:pl:gen:m1:perf:neg", "ppas:pl:gen:m2:imperf:aff", "ppas:pl:gen:m2:perf:aff", "ppas:pl:gen:m3:imperf:aff", "ppas:pl:gen:m3:imperf:neg", "ppas:pl:gen:m3:perf:aff", "ppas:pl:gen:m3:perf:neg", "ppas:pl:gen:n:imperf:aff", "ppas:pl:gen:n:perf:aff", "ppas:pl:gen:n:perf:neg", "ppas:pl:inst:f:imperf:aff", "ppas:pl:inst:f:perf:aff", "ppas:pl:inst:m1:imperf:aff", "ppas:pl:inst:m1:perf:aff", "ppas:pl:inst:m2:perf:aff", "ppas:pl:inst:m3:imperf:aff", "ppas:pl:inst:m3:perf:aff", "ppas:pl:inst:n:imperf:aff", "ppas:pl:inst:n:perf:aff", "ppas:pl:loc:f:imperf:aff", "ppas:pl:loc:f:imperf:neg", "ppas:pl:loc:f:perf:aff", "ppas:pl:loc:f:perf:neg", "ppas:pl:loc:m1:imperf:aff", "ppas:pl:loc:m1:perf:aff", "ppas:pl:loc:m2:imperf:aff", "ppas:pl:loc:m3:imperf:aff", "ppas:pl:loc:m3:perf:aff", "ppas:pl:loc:m3:perf:neg", "ppas:pl:loc:n:imperf:aff", "ppas:pl:loc:n:perf:aff", "ppas:pl:loc:n:perf:neg", "ppas:pl:nom:f:imperf:aff", "ppas:pl:nom:f:imperf:neg", "ppas:pl:nom:f:perf:aff", "ppas:pl:nom:f:perf:neg", "ppas:pl:nom:m1:imperf:aff", "ppas:pl:nom:m1:imperf:neg", "ppas:pl:nom:m1:perf:aff", "ppas:pl:nom:m1:perf:neg", "ppas:pl:nom:m2:imperf:aff", "ppas:pl:nom:m2:perf:aff", "ppas:pl:nom:m3:imperf:aff", "ppas:pl:nom:m3:imperf:neg", "ppas:pl:nom:m3:perf:aff", "ppas:pl:nom:m3:perf:neg", "ppas:pl:nom:n:imperf:aff", "ppas:pl:nom:n:perf:aff", "ppas:pl:nom:n:perf:neg", "ppas:sg:acc:f:imperf:aff", "ppas:sg:acc:f:imperf:neg", "ppas:sg:acc:f:perf:aff", "ppas:sg:acc:f:perf:neg", "ppas:sg:acc:m1:imperf:aff", "ppas:sg:acc:m1:perf:aff", "ppas:sg:acc:m2:imperf:aff", "ppas:sg:acc:m2:perf:aff", "ppas:sg:acc:m3:imperf:aff", "ppas:sg:acc:m3:imperf:neg", "ppas:sg:acc:m3:perf:aff", "ppas:sg:acc:m3:perf:neg", "ppas:sg:acc:n:imperf:aff", "ppas:sg:acc:n:perf:aff", "ppas:sg:acc:n:perf:neg", "ppas:sg:dat:f:imperf:aff", "ppas:sg:dat:f:imperf:neg", "ppas:sg:dat:f:perf:aff", "ppas:sg:dat:f:perf:neg", "ppas:sg:dat:m1:imperf:aff", "ppas:sg:dat:m1:perf:aff", "ppas:sg:dat:m3:imperf:aff", "ppas:sg:dat:m3:perf:aff", "ppas:sg:dat:n:perf:aff", "ppas:sg:gen:f:imperf:aff", "ppas:sg:gen:f:imperf:neg", "ppas:sg:gen:f:perf:aff", "ppas:sg:gen:f:perf:neg", "ppas:sg:gen:m1:imperf:aff", "ppas:sg:gen:m1:perf:aff", "ppas:sg:gen:m1:perf:neg", "ppas:sg:gen:m2:imperf:aff", "ppas:sg:gen:m2:perf:aff", "ppas:sg:gen:m3:imperf:aff", "ppas:sg:gen:m3:imperf:neg", "ppas:sg:gen:m3:perf:aff", "ppas:sg:gen:m3:perf:neg", "ppas:sg:gen:n:imperf:aff", "ppas:sg:gen:n:imperf:neg", "ppas:sg:gen:n:perf:aff", "ppas:sg:gen:n:perf:neg", "ppas:sg:inst:f:imperf:aff", "ppas:sg:inst:f:imperf:neg", "ppas:sg:inst:f:perf:aff", "ppas:sg:inst:f:perf:neg", "ppas:sg:inst:m1:imperf:aff", "ppas:sg:inst:m1:imperf:neg", "ppas:sg:inst:m1:perf:aff", "ppas:sg:inst:m1:perf:neg", "ppas:sg:inst:m2:imperf:aff", "ppas:sg:inst:m2:perf:aff", "ppas:sg:inst:m3:imperf:aff", "ppas:sg:inst:m3:imperf:neg", "ppas:sg:inst:m3:perf:aff", "ppas:sg:inst:m3:perf:neg", "ppas:sg:inst:n:imperf:aff", "ppas:sg:inst:n:imperf:neg", "ppas:sg:inst:n:perf:aff", "ppas:sg:inst:n:perf:neg", "ppas:sg:loc:f:imperf:aff", "ppas:sg:loc:f:perf:aff", "ppas:sg:loc:f:perf:neg", "ppas:sg:loc:m1:imperf:aff", "ppas:sg:loc:m1:perf:aff", "ppas:sg:loc:m2:imperf:aff", "ppas:sg:loc:m3:imperf:aff", "ppas:sg:loc:m3:imperf:neg", "ppas:sg:loc:m3:perf:aff", "ppas:sg:loc:m3:perf:neg", "ppas:sg:loc:n:imperf:aff", "ppas:sg:loc:n:perf:aff", "ppas:sg:loc:n:perf:neg", "ppas:sg:nom:f:imperf:aff", "ppas:sg:nom:f:imperf:neg", "ppas:sg:nom:f:perf:aff", "ppas:sg:nom:f:perf:neg", "ppas:sg:nom:m1:imperf:aff", "ppas:sg:nom:m1:imperf:neg", "ppas:sg:nom:m1:perf:aff", "ppas:sg:nom:m1:perf:neg", "ppas:sg:nom:m2:imperf:aff", "ppas:sg:nom:m2:perf:aff", "ppas:sg:nom:m3:imperf:aff", "ppas:sg:nom:m3:imperf:neg", "ppas:sg:nom:m3:perf:aff", "ppas:sg:nom:m3:perf:neg", "ppas:sg:nom:n:imperf:aff", "ppas:sg:nom:n:imperf:neg", "ppas:sg:nom:n:perf:aff", "ppas:sg:nom:n:perf:neg", "ppas:sg:voc:m2:imperf:aff", "ppron12:pl:acc:f:pri", "ppron12:pl:acc:f:sec", "ppron12:pl:acc:m1:pri", "ppron12:pl:acc:m1:sec", "ppron12:pl:acc:m2:sec", "ppron12:pl:acc:n:sec", "ppron12:pl:dat:f:pri", "ppron12:pl:dat:f:sec", "ppron12:pl:dat:m1:pri", "ppron12:pl:dat:m1:sec", "ppron12:pl:dat:m3:sec", "ppron12:pl:gen:f:pri", "ppron12:pl:gen:f:sec", "ppron12:pl:gen:m1:pri", "ppron12:pl:gen:m1:sec", "ppron12:pl:gen:m2:pri", "ppron12:pl:inst:f:pri", "ppron12:pl:inst:m1:pri", "ppron12:pl:inst:m1:sec", "ppron12:pl:inst:n:pri", "ppron12:pl:loc:f:sec", "ppron12:pl:loc:m1:pri", "ppron12:pl:loc:m1:sec", "ppron12:pl:loc:m3:sec", "ppron12:pl:nom:f:pri", "ppron12:pl:nom:f:sec", "ppron12:pl:nom:m1:pri", "ppron12:pl:nom:m1:pri:akc", "ppron12:pl:nom:m1:sec", "ppron12:pl:nom:m1:sec:akc", "ppron12:pl:nom:m2:pri", "ppron12:pl:nom:m2:sec", "ppron12:pl:nom:n:sec", "ppron12:sg:acc:f:pri:akc", "ppron12:sg:acc:f:sec:akc", "ppron12:sg:acc:f:sec:nakc", "ppron12:sg:acc:m1:pri:akc", "ppron12:sg:acc:m1:pri:nakc", "ppron12:sg:acc:m1:sec:akc", "ppron12:sg:acc:m1:sec:nakc", "ppron12:sg:acc:m2:pri:akc", "ppron12:sg:acc:m2:sec:nakc", "ppron12:sg:acc:m3:pri:akc", "ppron12:sg:acc:m3:sec:nakc", "ppron12:sg:acc:n:pri:akc", "ppron12:sg:acc:n:sec:nakc", "ppron12:sg:dat:f:pri:akc", "ppron12:sg:dat:f:pri:nakc", "ppron12:sg:dat:f:sec:akc", "ppron12:sg:dat:f:sec:nakc", "ppron12:sg:dat:m1:pri:akc", "ppron12:sg:dat:m1:pri:nakc", "ppron12:sg:dat:m1:sec:akc", "ppron12:sg:dat:m1:sec:nakc", "ppron12:sg:dat:m2:pri:nakc", "ppron12:sg:dat:m2:sec:akc", "ppron12:sg:dat:m2:sec:nakc", "ppron12:sg:gen:f:pri:akc", "ppron12:sg:gen:f:sec:akc", "ppron12:sg:gen:f:sec:nakc", "ppron12:sg:gen:m1:pri:akc", "ppron12:sg:gen:m1:sec:akc", "ppron12:sg:gen:m1:sec:nakc", "ppron12:sg:gen:m2:sec:akc", "ppron12:sg:gen:m2:sec:nakc", "ppron12:sg:gen:n:pri:akc", "ppron12:sg:inst:f:pri", "ppron12:sg:inst:f:sec", "ppron12:sg:inst:m1:pri", "ppron12:sg:inst:m1:pri:nakc", "ppron12:sg:inst:m1:sec", "ppron12:sg:inst:n:sec", "ppron12:sg:loc:f:pri", "ppron12:sg:loc:f:sec", "ppron12:sg:loc:m1:pri", "ppron12:sg:loc:m1:sec", "ppron12:sg:loc:m3:pri", "ppron12:sg:nom:f:pri", "ppron12:sg:nom:f:sec", "ppron12:sg:nom:m1:pri", "ppron12:sg:nom:m1:pri:akc", "ppron12:sg:nom:m1:pri:nakc", "ppron12:sg:nom:m1:sec", "ppron12:sg:nom:m1:sec:akc", "ppron12:sg:nom:m2:pri", "ppron12:sg:nom:m2:sec", "ppron12:sg:nom:m3:pri", "ppron12:sg:nom:m3:sec", "ppron12:sg:nom:n:sec", "ppron12:sg:voc:n:sec", "ppron3:pl:acc:f:ter:akc:npraep", "ppron3:pl:acc:f:ter:akc:praep", "ppron3:pl:acc:m1:ter:akc:npraep", "ppron3:pl:acc:m1:ter:akc:praep", "ppron3:pl:acc:m2:ter:akc:npraep", "ppron3:pl:acc:m2:ter:akc:praep", "ppron3:pl:acc:m3:ter:akc:npraep", "ppron3:pl:acc:m3:ter:akc:praep", "ppron3:pl:acc:n:ter:akc:npraep", "ppron3:pl:acc:n:ter:akc:praep", "ppron3:pl:dat:f:ter:akc:npraep", "ppron3:pl:dat:f:ter:akc:praep", "ppron3:pl:dat:m1:ter:akc:npraep", "ppron3:pl:dat:m1:ter:akc:praep", "ppron3:pl:dat:m2:ter:akc:npraep", "ppron3:pl:dat:m3:ter:akc:npraep", "ppron3:pl:dat:m3:ter:akc:praep", "ppron3:pl:dat:n:ter:akc:npraep", "ppron3:pl:gen:f:ter:akc:npraep", "ppron3:pl:gen:f:ter:akc:praep", "ppron3:pl:gen:m1:ter:akc:npraep", "ppron3:pl:gen:m1:ter:akc:praep", "ppron3:pl:gen:m2:ter:akc:npraep", "ppron3:pl:gen:m2:ter:akc:praep", "ppron3:pl:gen:m3:ter:akc:npraep", "ppron3:pl:gen:m3:ter:akc:praep", "ppron3:pl:gen:n:ter:akc:npraep", "ppron3:pl:gen:n:ter:akc:praep", "ppron3:pl:inst:f:ter:akc:npraep", "ppron3:pl:inst:f:ter:akc:praep", "ppron3:pl:inst:m1:ter:akc:npraep", "ppron3:pl:inst:m1:ter:akc:praep", "ppron3:pl:inst:m2:ter:akc:npraep", "ppron3:pl:inst:m2:ter:akc:praep", "ppron3:pl:inst:m3:ter:akc:npraep", "ppron3:pl:inst:m3:ter:akc:praep", "ppron3:pl:inst:n:ter:akc:npraep", "ppron3:pl:inst:n:ter:akc:praep", "ppron3:pl:loc:f:ter:akc:praep", "ppron3:pl:loc:m1:ter:akc:praep", "ppron3:pl:loc:m2:ter:akc:praep", "ppron3:pl:loc:m3:ter:akc:praep", "ppron3:pl:loc:n:ter:akc:praep", "ppron3:pl:nom:f:ter:akc:npraep", "ppron3:pl:nom:m1:ter:akc:npraep", "ppron3:pl:nom:m2:ter:akc:npraep", "ppron3:pl:nom:m3:ter:akc:npraep", "ppron3:pl:nom:n:ter:akc:npraep", "ppron3:sg:acc:f:ter:akc:npraep", "ppron3:sg:acc:f:ter:akc:praep", "ppron3:sg:acc:m1:ter:akc:npraep", "ppron3:sg:acc:m1:ter:akc:praep", "ppron3:sg:acc:m1:ter:nakc:npraep", "ppron3:sg:acc:m1:ter:nakc:praep", "ppron3:sg:acc:m2:ter:akc:praep", "ppron3:sg:acc:m2:ter:nakc:npraep", "ppron3:sg:acc:m2:ter:nakc:praep", "ppron3:sg:acc:m3:ter:akc:npraep", "ppron3:sg:acc:m3:ter:akc:praep", "ppron3:sg:acc:m3:ter:nakc:npraep", "ppron3:sg:acc:m3:ter:nakc:praep", "ppron3:sg:acc:n:ter:akc:npraep", "ppron3:sg:acc:n:ter:akc:praep", "ppron3:sg:dat:f:ter:akc:npraep", "ppron3:sg:dat:f:ter:akc:praep", "ppron3:sg:dat:m1:ter:akc:npraep", "ppron3:sg:dat:m1:ter:akc:praep", "ppron3:sg:dat:m1:ter:nakc:npraep", "ppron3:sg:dat:m2:ter:akc:npraep", "ppron3:sg:dat:m2:ter:nakc:npraep", "ppron3:sg:dat:m3:ter:akc:npraep", "ppron3:sg:dat:m3:ter:akc:praep", "ppron3:sg:dat:m3:ter:nakc:npraep", "ppron3:sg:dat:n:ter:akc:npraep", "ppron3:sg:dat:n:ter:akc:praep", "ppron3:sg:dat:n:ter:nakc:npraep", "ppron3:sg:gen:f:ter:akc:npraep", "ppron3:sg:gen:f:ter:akc:praep", "ppron3:sg:gen:m1:ter:akc:npraep", "ppron3:sg:gen:m1:ter:akc:praep", "ppron3:sg:gen:m1:ter:nakc:npraep", "ppron3:sg:gen:m1:ter:nakc:praep", "ppron3:sg:gen:m2:ter:akc:npraep", "ppron3:sg:gen:m2:ter:akc:praep", "ppron3:sg:gen:m2:ter:nakc:npraep", "ppron3:sg:gen:m3:ter:akc:npraep", "ppron3:sg:gen:m3:ter:akc:praep", "ppron3:sg:gen:m3:ter:nakc:npraep", "ppron3:sg:gen:m3:ter:nakc:praep", "ppron3:sg:gen:n:ter:akc:npraep", "ppron3:sg:gen:n:ter:akc:praep", "ppron3:sg:gen:n:ter:nakc:npraep", "ppron3:sg:inst:f:ter:akc:praep", "ppron3:sg:inst:m1:ter:akc:npraep", "ppron3:sg:inst:m1:ter:akc:praep", "ppron3:sg:inst:m2:ter:akc:npraep", "ppron3:sg:inst:m2:ter:akc:praep", "ppron3:sg:inst:m3:ter:akc:npraep", "ppron3:sg:inst:m3:ter:akc:praep", "ppron3:sg:inst:n:ter:akc:npraep", "ppron3:sg:inst:n:ter:akc:praep", "ppron3:sg:loc:f:ter:akc:praep", "ppron3:sg:loc:m1:ter:akc:praep", "ppron3:sg:loc:m2:ter:akc:praep", "ppron3:sg:loc:m3:ter:akc:praep", "ppron3:sg:loc:n:ter:akc:praep", "ppron3:sg:nom:f:ter:akc:npraep", "ppron3:sg:nom:f:ter:akc:praep", "ppron3:sg:nom:m1:ter:akc:npraep", "ppron3:sg:nom:m2:ter:akc:npraep", "ppron3:sg:nom:m2:ter:akc:praep", "ppron3:sg:nom:m3:ter:akc:npraep", "ppron3:sg:nom:n:ter:akc:npraep", "praet:pl:f:imperf", "praet:pl:f:perf", "praet:pl:m1:imperf", "praet:pl:m1:imperf:agl", "praet:pl:m1:perf", "praet:pl:m1:perf:nagl", "praet:pl:m2:imperf", "praet:pl:m2:perf", "praet:pl:m3:imperf", "praet:pl:m3:perf", "praet:pl:n:imperf", "praet:pl:n:perf", "praet:sg:f:imperf", "praet:sg:f:imperf:agl", "praet:sg:f:imperf:nagl", "praet:sg:f:perf", "praet:sg:m1:imperf", "praet:sg:m1:imperf:agl", "praet:sg:m1:imperf:nagl", "praet:sg:m1:perf", "praet:sg:m1:perf:agl", "praet:sg:m1:perf:nagl", "praet:sg:m2:imperf", "praet:sg:m2:imperf:nagl", "praet:sg:m2:perf", "praet:sg:m2:perf:nagl", "praet:sg:m3:imperf", "praet:sg:m3:imperf:nagl", "praet:sg:m3:perf", "praet:sg:m3:perf:nagl", "praet:sg:n:imperf", "praet:sg:n:perf", "pred", "prep:acc", "prep:acc:nwok", "prep:acc:wok", "prep:dat", "prep:gen", "prep:gen:nwok", "prep:gen:wok", "prep:inst", "prep:inst:nwok", "prep:inst:wok", "prep:loc", "prep:loc:nwok", "prep:loc:wok", "prep:nom", "qub", "qub:nwok", "qub:wok", "siebie:acc", "siebie:dat", "siebie:gen", "siebie:inst", "siebie:loc", "subst:pl:acc:f", "subst:pl:acc:m1", "subst:pl:acc:m2", "subst:pl:acc:m3", "subst:pl:acc:n", "subst:pl:dat:f", "subst:pl:dat:m1", "subst:pl:dat:m2", "subst:pl:dat:m3", "subst:pl:dat:n", "subst:pl:gen:f", "subst:pl:gen:m1", "subst:pl:gen:m2", "subst:pl:gen:m3", "subst:pl:gen:n", "subst:pl:inst:f", "subst:pl:inst:m1", "subst:pl:inst:m2", "subst:pl:inst:m3", "subst:pl:inst:n", "subst:pl:loc:f", "subst:pl:loc:m1", "subst:pl:loc:m2", "subst:pl:loc:m3", "subst:pl:loc:n", "subst:pl:nom:f", "subst:pl:nom:m1", "subst:pl:nom:m2", "subst:pl:nom:m3", "subst:pl:nom:n", "subst:sg:acc:f", "subst:sg:acc:m1", "subst:sg:acc:m2", "subst:sg:acc:m3", "subst:sg:acc:n", "subst:sg:dat:f", "subst:sg:dat:m1", "subst:sg:dat:m2", "subst:sg:dat:m3", "subst:sg:dat:n", "subst:sg:gen:f", "subst:sg:gen:m1", "subst:sg:gen:m2", "subst:sg:gen:m3", "subst:sg:gen:n", "subst:sg:inst:f", "subst:sg:inst:m1", "subst:sg:inst:m2", "subst:sg:inst:m3", "subst:sg:inst:n", "subst:sg:loc:f", "subst:sg:loc:m1", "subst:sg:loc:m2", "subst:sg:loc:m3", "subst:sg:loc:n", "subst:sg:nom:f", "subst:sg:nom:m1", "subst:sg:nom:m2", "subst:sg:nom:m3", "subst:sg:nom:n", "subst:sg:voc:f", "subst:sg:voc:m1", "subst:sg:voc:m2", "subst:sg:voc:m3", "subst:sg:voc:n", "winien:pl:f:imperf", "winien:pl:m1:imperf", "winien:pl:m2:imperf", "winien:pl:m3:imperf", "winien:pl:n:imperf", "winien:sg:f:imperf", "winien:sg:m1:imperf", "winien:sg:m2:imperf", "winien:sg:m3:imperf", "winien:sg:n:imperf", "xxx" }; runTest("pl", "ncp", tagset, "To badanie .", new String[] { "ten", "badanie", "." }, new String[] { "adj:sg:acc:n:pos", "subst:sg:acc:n", "SENT" }, new String[] { "ADJ", "NOUN", "PUNCT" }); } @Test public void testRussian() throws Exception { String[] tagset = { ",", "-", "Afcmsnf", "Afpfpgf", "Afpfsaf", "Afpfsas", "Afpfsdf", "Afpfsgf", "Afpfsif", "Afpfslf", "Afpfsnf", "Afpfsns", "Afpmpaf", "Afpmpdf", "Afpmpgf", "Afpmpif", "Afpmplf", "Afpmpnf", "Afpmpns", "Afpmsaf", "Afpmsdf", "Afpmsds", "Afpmsgf", "Afpmsgs", "Afpmsif", "Afpmslf", "Afpmsnf", "Afpmsns", "Afpnpaf", "Afpnpnf", "Afpnsaf", "Afpnsdf", "Afpnsgf", "Afpnsif", "Afpnslf", "Afpnsnf", "Afpnsns", "C", "I", "Mc", "Mc---d", "Mc--a", "Mc--ad", "Mc--d", "Mc--dd", "Mc--g", "Mc--gd", "Mc--i", "Mc--id", "Mc--l", "Mc--n", "Mcf-a", "Mcf-d", "Mcf-g", "Mcf-i", "Mcf-l", "Mcf-n", "Mcm-a", "Mcm-d", "Mcm-g", "Mcm-i", "Mcm-l", "Mcm-n", "Mcn-a", "Mcn-d", "Mcn-g", "Mcn-i", "Mcn-l", "Mcn-n", "Mo---d", "Mo--g", "Mo--i", "Mo-pa", "Mo-pad", "Mo-pd", "Mo-pdd", "Mo-pg", "Mo-pgd", "Mo-pi", "Mo-pid", "Mo-pl", "Mo-pld", "Mo-pn", "Mo-pnd", "Mo-sad", "Mof", "Mof-a", "Mof-d", "Mof-g", "Mof-i", "Mof-l", "Mof-n", "Mofsa", "Mofsad", "Mofsd", "Mofsdd", "Mofsg", "Mofsgd", "Mofsi", "Mofsid", "Mofsl", "Mofsld", "Mofsn", "Mofsnd", "Mom-a", "Mom-d", "Mom-g", "Mom-i", "Mom-l", "Mom-n", "Momsa", "Momsad", "Momsd", "Momsg", "Momsgd", "Momsi", "Momsid", "Momsl", "Momsld", "Momsn", "Momsnd", "Mon-a", "Mon-d", "Mon-g", "Mon-i", "Mon-l", "Mon-n", "Monsa", "Monsad", "Monsd", "Monsg", "Monsgd", "Monsi", "Monsid", "Monsl", "Monsn", "Monsnd", "Nccpay", "Nccpdy", "Nccpgy", "Nccpiy", "Nccply", "Nccpny", "Nccsay", "Nccsdy", "Nccsgn", "Nccsgy", "Nccsiy", "Nccsly", "Nccsnn", "Nccsny", "Ncfpan", "Ncfpay", "Ncfpdn", "Ncfpdy", "Ncfpgn", "Ncfpgy", "Ncfpin", "Ncfpiy", "Ncfpln", "Ncfply", "Ncfpnn", "Ncfpny", "Ncfsan", "Ncfsay", "Ncfsdn", "Ncfsdy", "Ncfsgn", "Ncfsgy", "Ncfsin", "Ncfsiy", "Ncfsln", "Ncfsly", "Ncfsnn", "Ncfsnnl", "Ncfsnnp", "Ncfsny", "Ncfsvy", "Ncmpan", "Ncmpay", "Ncmpdn", "Ncmpdy", "Ncmpgn", "Ncmpgy", "Ncmpin", "Ncmpiy", "Ncmpln", "Ncmply", "Ncmpnn", "Ncmpnnl", "Ncmpny", "Ncmsan", "Ncmsay", "Ncmsdn", "Ncmsdy", "Ncmsgn", "Ncmsgy", "Ncmsin", "Ncmsiy", "Ncmsln", "Ncmsly", "Ncmsnn", "Ncmsnnl", "Ncmsnnp", "Ncmsny", "Ncmsvn", "Ncmsvy", "Ncnpan", "Ncnpay", "Ncnpdn", "Ncnpdy", "Ncnpgn", "Ncnpgy", "Ncnpin", "Ncnpiy", "Ncnpln", "Ncnply", "Ncnpnn", "Ncnpny", "Ncnsan", "Ncnsay", "Ncnsdn", "Ncnsdy", "Ncnsgn", "Ncnsgy", "Ncnsin", "Ncnsiy", "Ncnsln", "Ncnsly", "Ncnsnn", "Ncnsny", "Npcpay", "Npcsay", "Npcsdy", "Npcsgy", "Npcsiy", "Npcsly", "Npcsnn", "Npcsny", "Npcsvy", "Npfpay", "Npfpdy", "Npfpgy", "Npfpiy", "Npfpny", "Npfsay", "Npfsdy", "Npfsgn", "Npfsgy", "Npfsiy", "Npfsly", "Npfsnn", "Npfsny", "Npfsvy", "Npmpay", "Npmpdy", "Npmpgy", "Npmpiy", "Npmpny", "Npmpvy", "Npmsay", "Npmsdn", "Npmsdy", "Npmsgn", "Npmsgy", "Npmsiy", "Npmsly", "Npmsnn", "Npmsny", "Npmsvy", "Npnsan", "Npnsnn", "P-----a", "P-----r", "P----an", "P----ar", "P----dn", "P----dr", "P----gn", "P----gr", "P----in", "P----ir", "P----ln", "P----nn", "P---p-a", "P---paa", "P---pan", "P---pda", "P---pdn", "P---pga", "P---pgn", "P---pia", "P---pin", "P---pla", "P---pln", "P---pna", "P---pnn", "P---san", "P---sar", "P---sdn", "P---sdr", "P---sga", "P---sgn", "P---sgr", "P---sia", "P---sin", "P---sir", "P---sln", "P---snn", "P--f-aa", "P--f-la", "P--fpaa", "P--fs-a", "P--fsaa", "P--fsan", "P--fsda", "P--fsdn", "P--fsga", "P--fsgn", "P--fsia", "P--fsin", "P--fsla", "P--fsln", "P--fsna", "P--fsnn", "P--m-aa", "P--m-ga", "P--m-ia", "P--m-la", "P--mpga", "P--ms-a", "P--msaa", "P--msan", "P--msda", "P--msdn", "P--msga", "P--msgn", "P--msia", "P--msin", "P--msla", "P--msln", "P--msna", "P--msnn", "P--n-an", "P--n-ga", "P--n-la", "P--n-na", "P--npan", "P--npgn", "P--npnn", "P--ns-a", "P--nsaa", "P--nsan", "P--nsda", "P--nsdn", "P--nsga", "P--nsgn", "P--nsia", "P--nsin", "P--nsla", "P--nsln", "P--nsna", "P--nsnn", "P-1-pan", "P-1-pdn", "P-1-pgn", "P-1-pin", "P-1-pln", "P-1-pnn", "P-1-san", "P-1-sdn", "P-1-sgn", "P-1-sin", "P-1-sln", "P-1-snn", "P-1nsnn", "P-2-pan", "P-2-pdn", "P-2-pgn", "P-2-pin", "P-2-pln", "P-2-pnn", "P-2-san", "P-2-sdn", "P-2-sgn", "P-2-sin", "P-2-sln", "P-2-snn", "P-2msdn", "P-2nsan", "P-3-pan", "P-3-pdn", "P-3-pgn", "P-3-pin", "P-3-pln", "P-3-pnn", "P-3-san", "P-3fsan", "P-3fsdn", "P-3fsgn", "P-3fsin", "P-3fsln", "P-3fsnn", "P-3msan", "P-3msdn", "P-3msgn", "P-3msin", "P-3msln", "P-3msnn", "P-3nsan", "P-3nsdn", "P-3nsgn", "P-3nsin", "P-3nsln", "P-3nsnn", "Q", "R", "Rc", "SENT", "Sp-a", "Sp-d", "Sp-g", "Sp-i", "Sp-l", "Sp-n", "Vmg----a-p", "Vmg----m-p", "Vmgp---a-e", "Vmgp---a-p", "Vmgp---m-e", "Vmgp---m-p", "Vmgs---a-e", "Vmgs---a-p", "Vmgs---m-e", "Vmgs---m-p", "Vmi-1--a-e", "Vmif1p-a-e", "Vmif1p-a-p", "Vmif1p-m-p", "Vmif1s-a-e", "Vmif1s-a-p", "Vmif1s-m-p", "Vmif2p-a-e", "Vmif2p-a-p", "Vmif2p-m-p", "Vmif2s-a-e", "Vmif2s-a-p", "Vmif2s-m-p", "Vmif3p-a-e", "Vmif3p-a-p", "Vmif3p-m-p", "Vmif3s-a-e", "Vmif3s-a-p", "Vmif3s-m-p", "Vmip---m-e", "Vmip1p-a-e", "Vmip1p-a-p", "Vmip1p-m-e", "Vmip1s-a-e", "Vmip1s-a-p", "Vmip1s-m-e", "Vmip2p-a-e", "Vmip2p-m-e", "Vmip2s-a-e", "Vmip2s-m-e", "Vmip3p-a-e", "Vmip3p-a-p", "Vmip3p-m-e", "Vmip3p-p-e", "Vmip3s-a-e", "Vmip3s-m-e", "Vmip3s-p-e", "Vmis---a-e", "Vmis---a-p", "Vmis---m-e", "Vmis--nm-e", "Vmis-p-a-e", "Vmis-p-a-p", "Vmis-p-m-e", "Vmis-p-m-p", "Vmis-p-p-e", "Vmis-s-a-e", "Vmis-s-a-p", "Vmis-sfa-e", "Vmis-sfa-p", "Vmis-sfm-e", "Vmis-sfm-p", "Vmis-sfp-e", "Vmis-sma-e", "Vmis-sma-p", "Vmis-smm-e", "Vmis-smm-p", "Vmis-smp-e", "Vmis-smp-p", "Vmis-sna-e", "Vmis-sna-p", "Vmis-snm-e", "Vmis-snm-p", "Vmis-snp-e", "Vmm--s-a-e", "Vmm-1p-a-e", "Vmm-1p-a-p", "Vmm-1p-m-p", "Vmm-1s-a-e", "Vmm-1s-a-p", "Vmm-1s-m-p", "Vmm-2--a-e", "Vmm-2--a-p", "Vmm-2p-a-e", "Vmm-2p-a-p", "Vmm-2p-m-e", "Vmm-2p-m-p", "Vmm-2s-a-e", "Vmm-2s-a-p", "Vmm-2s-m-e", "Vmm-2s-m-p", "Vmn----a-e", "Vmn----a-p", "Vmn----m-e", "Vmn----m-p", "Vmn----p-e", "Vmpp-p-a-ea", "Vmpp-p-a-ed", "Vmpp-p-a-eg", "Vmpp-p-a-ei", "Vmpp-p-a-el", "Vmpp-p-a-en", "Vmpp-p-afea", "Vmpp-p-afed", "Vmpp-p-afeg", "Vmpp-p-afei", "Vmpp-p-afel", "Vmpp-p-afen", "Vmpp-p-m-ea", "Vmpp-p-m-ed", "Vmpp-p-m-eg", "Vmpp-p-m-ei", "Vmpp-p-m-el", "Vmpp-p-m-en", "Vmpp-p-mfea", "Vmpp-p-mfed", "Vmpp-p-mfeg", "Vmpp-p-mfei", "Vmpp-p-mfel", "Vmpp-p-mfen", "Vmpp-p-p-ea", "Vmpp-p-p-ed", "Vmpp-p-p-eg", "Vmpp-p-p-en", "Vmpp-p-pfea", "Vmpp-p-pfed", "Vmpp-p-pfeg", "Vmpp-p-pfei", "Vmpp-p-pfel", "Vmpp-p-pfen", "Vmpp-p-pse", "Vmpp-pma-eg", "Vmpp-s-a-ei", "Vmpp-s-afei", "Vmpp-sfa-ea", "Vmpp-sfa-ed", "Vmpp-sfa-eg", "Vmpp-sfa-ei", "Vmpp-sfa-el", "Vmpp-sfa-en", "Vmpp-sfafea", "Vmpp-sfafed", "Vmpp-sfafeg", "Vmpp-sfafei", "Vmpp-sfafel", "Vmpp-sfafen", "Vmpp-sfm-ea", "Vmpp-sfm-ed", "Vmpp-sfm-eg", "Vmpp-sfm-ei", "Vmpp-sfm-el", "Vmpp-sfm-en", "Vmpp-sfmfea", "Vmpp-sfmfed", "Vmpp-sfmfeg", "Vmpp-sfmfei", "Vmpp-sfmfel", "Vmpp-sfmfen", "Vmpp-sfp-ea", "Vmpp-sfp-eg", "Vmpp-sfp-ei", "Vmpp-sfp-el", "Vmpp-sfp-en", "Vmpp-sfpfea", "Vmpp-sfpfed", "Vmpp-sfpfeg", "Vmpp-sfpfei", "Vmpp-sfpfel", "Vmpp-sfpfen", "Vmpp-sfpse", "Vmpp-sma-ea", "Vmpp-sma-ed", "Vmpp-sma-eg", "Vmpp-sma-ei", "Vmpp-sma-el", "Vmpp-sma-en", "Vmpp-smafea", "Vmpp-smafed", "Vmpp-smafeg", "Vmpp-smafei", "Vmpp-smafel", "Vmpp-smafen", "Vmpp-smase", "Vmpp-smm-ea", "Vmpp-smm-ed", "Vmpp-smm-eg", "Vmpp-smm-ei", "Vmpp-smm-el", "Vmpp-smm-en", "Vmpp-smmfea", "Vmpp-smmfed", "Vmpp-smmfeg", "Vmpp-smmfei", "Vmpp-smmfel", "Vmpp-smmfen", "Vmpp-smp-ea", "Vmpp-smp-eg", "Vmpp-smp-ei", "Vmpp-smp-el", "Vmpp-smp-en", "Vmpp-smpfea", "Vmpp-smpfed", "Vmpp-smpfeg", "Vmpp-smpfei", "Vmpp-smpfel", "Vmpp-smpfen", "Vmpp-smpse", "Vmpp-sna-ea", "Vmpp-sna-ed", "Vmpp-sna-eg", "Vmpp-sna-ei", "Vmpp-sna-el", "Vmpp-sna-en", "Vmpp-snafea", "Vmpp-snafed", "Vmpp-snafeg", "Vmpp-snafei", "Vmpp-snafel", "Vmpp-snafen", "Vmpp-snm-ea", "Vmpp-snm-ed", "Vmpp-snm-eg", "Vmpp-snm-ei", "Vmpp-snm-en", "Vmpp-snmfea", "Vmpp-snmfed", "Vmpp-snmfeg", "Vmpp-snmfei", "Vmpp-snmfel", "Vmpp-snmfen", "Vmpp-snp-ea", "Vmpp-snp-ed", "Vmpp-snp-eg", "Vmpp-snp-ei", "Vmpp-snp-en", "Vmpp-snpfea", "Vmpp-snpfed", "Vmpp-snpfeg", "Vmpp-snpfei", "Vmpp-snpfel", "Vmpp-snpfen", "Vmpp-snpse", "Vmps-p-a-ea", "Vmps-p-a-ed", "Vmps-p-a-eg", "Vmps-p-a-ei", "Vmps-p-a-el", "Vmps-p-a-en", "Vmps-p-a-pa", "Vmps-p-a-pd", "Vmps-p-a-pg", "Vmps-p-a-pi", "Vmps-p-a-pl", "Vmps-p-a-pn", "Vmps-p-afea", "Vmps-p-afed", "Vmps-p-afeg", "Vmps-p-afei", "Vmps-p-afel", "Vmps-p-afen", "Vmps-p-afpa", "Vmps-p-afpd", "Vmps-p-afpg", "Vmps-p-afpi", "Vmps-p-afpl", "Vmps-p-afpn", "Vmps-p-m-ea", "Vmps-p-m-eg", "Vmps-p-m-ei", "Vmps-p-m-el", "Vmps-p-m-en", "Vmps-p-m-pa", "Vmps-p-m-pd", "Vmps-p-m-pg", "Vmps-p-m-pi", "Vmps-p-m-pl", "Vmps-p-m-pn", "Vmps-p-mfea", "Vmps-p-mfed", "Vmps-p-mfeg", "Vmps-p-mfei", "Vmps-p-mfel", "Vmps-p-mfen", "Vmps-p-mfpa", "Vmps-p-mfpd", "Vmps-p-mfpg", "Vmps-p-mfpi", "Vmps-p-mfpl", "Vmps-p-mfpn", "Vmps-p-p-ed", "Vmps-p-p-eg", "Vmps-p-p-ei", "Vmps-p-p-en", "Vmps-p-p-pa", "Vmps-p-p-pd", "Vmps-p-p-pg", "Vmps-p-p-pi", "Vmps-p-p-pl", "Vmps-p-p-pn", "Vmps-p-pfea", "Vmps-p-pfed", "Vmps-p-pfeg", "Vmps-p-pfei", "Vmps-p-pfel", "Vmps-p-pfen", "Vmps-p-pfpa", "Vmps-p-pfpd", "Vmps-p-pfpg", "Vmps-p-pfpi", "Vmps-p-pfpl", "Vmps-p-pfpn", "Vmps-p-pse", "Vmps-p-psp", "Vmps-s-pfpa", "Vmps-s-pfpn", "Vmps-sfa-ea", "Vmps-sfa-ed", "Vmps-sfa-eg", "Vmps-sfa-ei", "Vmps-sfa-el", "Vmps-sfa-en", "Vmps-sfa-pa", "Vmps-sfa-pd", "Vmps-sfa-pg", "Vmps-sfa-pi", "Vmps-sfa-pl", "Vmps-sfa-pn", "Vmps-sfafea", "Vmps-sfafed", "Vmps-sfafeg", "Vmps-sfafei", "Vmps-sfafel", "Vmps-sfafen", "Vmps-sfafpa", "Vmps-sfafpd", "Vmps-sfafpg", "Vmps-sfafpi", "Vmps-sfafpl", "Vmps-sfafpn", "Vmps-sfm-ea", "Vmps-sfm-eg", "Vmps-sfm-el", "Vmps-sfm-en", "Vmps-sfm-pa", "Vmps-sfm-pd", "Vmps-sfm-pg", "Vmps-sfm-pi", "Vmps-sfm-pl", "Vmps-sfm-pn", "Vmps-sfmfea", "Vmps-sfmfed", "Vmps-sfmfeg", "Vmps-sfmfei", "Vmps-sfmfel", "Vmps-sfmfen", "Vmps-sfmfpa", "Vmps-sfmfpd", "Vmps-sfmfpg", "Vmps-sfmfpi", "Vmps-sfmfpl", "Vmps-sfmfpn", "Vmps-sfp-ea", "Vmps-sfp-ed", "Vmps-sfp-eg", "Vmps-sfp-ei", "Vmps-sfp-en", "Vmps-sfp-pa", "Vmps-sfp-pd", "Vmps-sfp-pg", "Vmps-sfp-pi", "Vmps-sfp-pl", "Vmps-sfp-pn", "Vmps-sfpfea", "Vmps-sfpfed", "Vmps-sfpfeg", "Vmps-sfpfei", "Vmps-sfpfel", "Vmps-sfpfen", "Vmps-sfpfpa", "Vmps-sfpfpd", "Vmps-sfpfpg", "Vmps-sfpfpi", "Vmps-sfpfpl", "Vmps-sfpfpn", "Vmps-sfpse", "Vmps-sfpsp", "Vmps-sma-ea", "Vmps-sma-ed", "Vmps-sma-eg", "Vmps-sma-ei", "Vmps-sma-el", "Vmps-sma-en", "Vmps-sma-pa", "Vmps-sma-pd", "Vmps-sma-pg", "Vmps-sma-pi", "Vmps-sma-pl", "Vmps-sma-pn", "Vmps-smafea", "Vmps-smafed", "Vmps-smafeg", "Vmps-smafei", "Vmps-smafel", "Vmps-smafen", "Vmps-smafpa", "Vmps-smafpd", "Vmps-smafpg", "Vmps-smafpi", "Vmps-smafpl", "Vmps-smafpn", "Vmps-smm-ea", "Vmps-smm-ed", "Vmps-smm-eg", "Vmps-smm-ei", "Vmps-smm-en", "Vmps-smm-pa", "Vmps-smm-pd", "Vmps-smm-pg", "Vmps-smm-pi", "Vmps-smm-pl", "Vmps-smm-pn", "Vmps-smmfea", "Vmps-smmfeg", "Vmps-smmfei", "Vmps-smmfel", "Vmps-smmfen", "Vmps-smmfpa", "Vmps-smmfpd", "Vmps-smmfpg", "Vmps-smmfpi", "Vmps-smmfpl", "Vmps-smmfpn", "Vmps-smp-ea", "Vmps-smp-eg", "Vmps-smp-ei", "Vmps-smp-en", "Vmps-smp-pa", "Vmps-smp-pd", "Vmps-smp-pg", "Vmps-smp-pi", "Vmps-smp-pl", "Vmps-smp-pn", "Vmps-smpfea", "Vmps-smpfed", "Vmps-smpfeg", "Vmps-smpfei", "Vmps-smpfel", "Vmps-smpfen", "Vmps-smpfpa", "Vmps-smpfpd", "Vmps-smpfpg", "Vmps-smpfpi", "Vmps-smpfpl", "Vmps-smpfpn", "Vmps-smpse", "Vmps-smpsp", "Vmps-sna-ea", "Vmps-sna-eg", "Vmps-sna-ei", "Vmps-sna-el", "Vmps-sna-en", "Vmps-sna-p", "Vmps-sna-pa", "Vmps-sna-pd", "Vmps-sna-pg", "Vmps-sna-pi", "Vmps-sna-pl", "Vmps-sna-pn", "Vmps-snafea", "Vmps-snafed", "Vmps-snafeg", "Vmps-snafei", "Vmps-snafel", "Vmps-snafen", "Vmps-snafpa", "Vmps-snafpd", "Vmps-snafpg", "Vmps-snafpi", "Vmps-snafpl", "Vmps-snafpn", "Vmps-snm-ea", "Vmps-snm-eg", "Vmps-snm-en", "Vmps-snm-pa", "Vmps-snm-pg", "Vmps-snm-pi", "Vmps-snm-pl", "Vmps-snm-pn", "Vmps-snmfea", "Vmps-snmfed", "Vmps-snmfeg", "Vmps-snmfei", "Vmps-snmfel", "Vmps-snmfen", "Vmps-snmfpa", "Vmps-snmfpd", "Vmps-snmfpg", "Vmps-snmfpi", "Vmps-snmfpl", "Vmps-snmfpn", "Vmps-snp-el", "Vmps-snp-p", "Vmps-snp-pa", "Vmps-snp-pd", "Vmps-snp-pg", "Vmps-snp-pi", "Vmps-snp-pl", "Vmps-snp-pn", "Vmps-snpfea", "Vmps-snpfeg", "Vmps-snpfen", "Vmps-snpfpa", "Vmps-snpfpd", "Vmps-snpfpg", "Vmps-snpfpi", "Vmps-snpfpl", "Vmps-snpfpn", "Vmps-snpse", "Vmps-snpsp" }; runTest("ru", "msd", tagset, "Это тест .", new String[] { "это", "тест", "." }, new String[] { "P--nsnn", "Ncmsnn", "SENT" }, new String[] { "PRON", "NOUN", "PUNCT" }); } @Test @Ignore("Slovene model currently not in Artifactory because we do not know tagset yet") public void testSlovene() throws Exception { String[] tagset = { }; runTest("sl", null, tagset, "To je test .", new String[] { "ta", "biti", "test", "." }, new String[] { "zk-sei----s", "gvpste--n", "somei", "SENT" }, new String[] { "POS", "POS", "POS", "POS" }); runTest("sl", null, tagset, "Gremo na Češko za kosilo .", new String[] { "iti", "na", "Češko", "za", "kosilo", "." }, new String[] { "gppspm--n-----d", "dpet", "slmei", "dpet", "soset", "SENT" }, new String[] { "POS", "POS", "POS", "POS", "POS", "POS" }); } @Test public void testSlovak() throws Exception { String[] tagset = { "!", "\"", "#", "%", "(", ")", ",", ".", "0", ":", ";", "?", "Apx", "Apy", "Apz", "Asx", "Asy", "Asz", "Dx", "Dy", "Dz", "E", "Gpx", "Gpy", "Gpz", "Gsx", "Gsy", "Gsz", "J", "ND", "Np", "Ns", "O", "OY", "PD", "Pp", "Ps", "Q", "R", "Sp", "Ss", "T", "TY", "VBpa", "VBpb", "VBpc", "VBsa", "VBsb", "VBsc", "VH", "VI", "VKpa", "VKpb", "VKpc", "VKsa", "VKsb", "VKsc", "VLpa", "VLpb", "VLpc", "VLsa", "VLsb", "VLsc", "VMpa", "VMpb", "VMsb", "W", "Y", "Z", "par" }; runTest("sk", "smt-reduced", tagset, "To je test .", new String[] { "to", "byť", "test", "." }, new String[] { "Ps", "VKsc", "Ss", "." }, new String[] { "PRON", "VERB", "NOUN", "PUNCT" }); } @Test public void testChinese() throws Exception { String[] tagset = { "a", "ad", "ag", "an", "b", "bg", "c", "d", "dg", "e", "ew", "f", "g", "h", "i", "j", "k", "l", "m", "mg", "n", "nd", "ng", "nh", "ni", "nl", "nr", "ns", "nt", "nx", "nz", "o", "p", "q", "r", "rg", "s", "t", "tg", "u", "v", "vd", "vg", "vn", "w", "wp", "ws", "x", "y", "z" }; // The rudder often in the wake of the wind round the back of the area. runTest("zh", "lcmc", tagset, "尾 舵 常 处于 风轮 后面 的 尾流 区里 。", new String[] { "_", "_", "_", "_", "风轮", "_", "_", "_", "_", "_" }, new String[] { "ng", "n", "d", "v", "n", "f", "u", "n", "nl", "ew" }, new String[] { "X", "NOUN", "ADV", "VERB", "NOUN", "X", "X", "NOUN", "X", "PUNCT" } ); // The service sector has become an important engine of Guangdong's economic transformation // and upgrading. runTest("zh", "lcmc", tagset, "服务业 成为 广东 经济 转型 升级 的 重要 引擎 。", new String[] { "_", "_", "_", "_", "_", "_", "_", "_", "_", "_" }, new String[] { "n", "v", "ns", "n", "v", "v", "u", "a", "n", "ew" }, new String[] { "NOUN", "VERB", "X", "NOUN", "VERB", "VERB", "X", "X", "NOUN", "PUNCT" } ); // How far is China from the world brand? runTest("zh", "lcmc", tagset, "中国 离 世界 技术 品牌 有 多远 ?", new String[] { "_", "_", "_", "_", "_", "_", "多远", "_" }, new String[] { "ns", "v", "n", "n", "n", "v", "n", "ew" }, new String[] { "X", "VERB", "NOUN", "NOUN", "NOUN", "VERB", "NOUN", "PUNCT" } ); } @Test // @Ignore("Platform specific") public void testOddCharacters() throws Exception { runTest("en", null, null, "² § ¶ § °", new String[] { "²", "§", "¶", "§", "°" }, new String[] { "NN", "SYM", "NN", "SYM", "SYM" }, new String[] { "NOUN", "PUNCT", "NOUN", "PUNCT", "PUNCT" }); } /** * Generate a very large document and test it. */ @Test @Ignore("Ignoring test to avoid memory errors (see issue #850 in GitHub") public void hugeDocumentTest() throws Exception { // Start Java with -Xmx512m boolean run = Runtime.getRuntime().maxMemory() > (500000000); if (!run) { System.out.println("Test requires more heap than available, skipping"); } Assume.assumeTrue(run); // Disable trace as this significantly slows down the test TreeTaggerWrapper.TRACE = false; String text = "This is a test ."; int reps = 4000000 / text.length(); String testString = repeat(text, " ", reps); JCas jcas = runTest("en", null, null, testString, null, null, null); List<POS> actualTags = new ArrayList<POS>(select(jcas, POS.class)); assertEquals(reps * 5, actualTags.size()); // test POS annotations String[] expectedTags = new String[] { "DT", "VBZ", "DT", "NN", "SENT" }; String[] expectedTagClasses = new String[] { "ART", "V", "ART", "NN", "PUNC" }; for (int i = 0; i < actualTags.size(); i++) { POS posAnnotation = actualTags.get(i); assertEquals("In position "+i, expectedTagClasses[i%5], posAnnotation.getType().getShortName()); assertEquals("In position "+i, expectedTags[i%5], posAnnotation.getPosValue()); } System.out.println("Successfully tagged document with " + testString.length() + " characters"); } /** * Test using the same AnalysisEngine multiple times. */ @Test public void multiDocumentTest() throws Exception { checkModelsAndBinary("en"); String testDocument = "This is a test ."; String[] lemmas = { "this", "be", "a", "test", "." }; String[] tags = { "DT", "VBZ", "DT", "NN", "SENT" }; String[] tagClasses = { "DET", "VERB", "DET", "NOUN", "PUNCT" }; AnalysisEngine engine = createEngine(TreeTaggerPosTagger.class); HideOutput hideOut = new HideOutput(); try { for (int n = 0; n < 100; n++) { JCas aJCas = TestRunner.runTest(engine, "en", testDocument); AssertAnnotations.assertPOS(tagClasses, tags, select(aJCas, POS.class)); AssertAnnotations.assertLemma(lemmas, select(aJCas, Lemma.class)); } } finally { engine.destroy(); hideOut.restoreOutput(); } } /** * Run the {@link #hugeDocumentTest()} 100 times. */ @Test @Ignore("This test takes a very long time. Only include it if you need to "+ "test the stability of the annotator") public void loadTest() throws Exception { for (int i = 0; i < 100; i++) { System.out.println("Load test iteration " + i); hugeDocumentTest(); } } private void checkModelsAndBinary(String lang) { Assume.assumeTrue(getClass().getResource( "/de/tudarmstadt/ukp/dkpro/core/treetagger/lib/tagger-" + lang + "-le.bin") != null); Assume.assumeTrue(getClass().getResource( "/de/tudarmstadt/ukp/dkpro/core/treetagger/bin/LICENSE.txt") != null); } private JCas runTest(String language, String tagsetName, String[] tagset, String testDocument, String[] lemmas, String[] tags, String[] tagClasses) throws Exception { checkModelsAndBinary(language); AnalysisEngine engine = createEngine(TreeTaggerPosTagger.class, TreeTaggerPosTagger.PARAM_PRINT_TAGSET, true); JCas aJCas = TestRunner.runTest(engine, language, testDocument); AssertAnnotations.assertLemma(lemmas, select(aJCas, Lemma.class)); AssertAnnotations.assertPOS(tagClasses, tags, select(aJCas, POS.class)); if (tagset != null) { AssertAnnotations.assertTagset(POS.class, tagsetName, tagset, aJCas); } return aJCas; } /** * Test using the same AnalysisEngine multiple times. */ @Test public void longTokenTest() throws Exception { checkModelsAndBinary("en"); AnalysisEngine engine = createEngine(TreeTaggerPosTagger.class); JCas jcas = engine.newJCas(); try { for (int n = 99990; n < 100000; n ++) { System.out.println(n); jcas.setDocumentLanguage("en"); JCasBuilder builder = new JCasBuilder(jcas); builder.add("Start", Token.class); builder.add("with", Token.class); builder.add("good", Token.class); builder.add("tokens", Token.class); builder.add(".", Token.class); builder.add(StringUtils.repeat("b", n), Token.class); builder.add("End", Token.class); builder.add("with", Token.class); builder.add("some", Token.class); builder.add("good", Token.class); builder.add("tokens", Token.class); builder.add(".", Token.class); builder.close(); engine.process(jcas); jcas.reset(); } } finally { engine.destroy(); } } /** * Runs a small pipeline on a text containing quite odd characters such as * Unicode LEFT-TO-RIGHT-MARKs. The BreakIteratorSegmenter creates tokens from these * which are send to TreeTagger as tokens containing line breaks or only * whitespace. TreeTaggerPosLemmaTT4J has to filter these tokens before * they reach the TreeTaggerWrapper. */ // @Test // public // void testStrangeDocument() // throws Exception // { // CollectionReader reader = createReader( // FileSystemReader.class, // createTypeSystemDescription(), // FileSystemReader.PARAM_INPUTDIR, getTestResource( // "test_files/annotator/TreeTaggerPosLemmaTT4J/strange")); // // AnalysisEngine sentenceSplitter = createEngine( // BreakIteratorSegmenter.class, // tsd); // // AnalysisEngine tt = createEngine(TreeTaggerPosLemmaTT4J.class, tsd, // TreeTaggerTT4JBase.PARAM_LANGUAGE_CODE, "en"); // // runPipeline(reader, sentenceSplitter, tt); // } // @Test // @Ignore("This test should fail, however - due to fixes in the Tokenizer, " + // "we can currently not provokate a failure with the given 'strange' " + // "document.") // public // void testStrangeDocumentFail() // throws Exception // { // CollectionReader reader = createReader( // FileSystemReader.class, // createTypeSystemDescription(), // FileSystemReader.PARAM_INPUTDIR, getTestResource( // "test_files/annotator/TreeTaggerPosLemmaTT4J/strange")); // // AnalysisEngine sentenceSplitter = createEngine( // BreakIteratorSegmenter.class, // tsd); // // AnalysisEngine tt = createEngine(TreeTaggerPosLemmaTT4J.class, tsd, // TreeTaggerTT4JBase.PARAM_LANGUAGE_CODE, "en", // TreeTaggerTT4JBase.PARAM_PERFORMANCE_MODE, true); // // runPipeline( // reader, // sentenceSplitter, // tt); // } /** * When running this test, check manually if TreeTagger is restarted * between the documents. If you jank up the log levels, that should be * visible on the console. Unfortunately we cannot easily access the * restartCount of the TreeTaggerWrapper. */ // @Test // public // void testRealMultiDocument() // throws Exception // { // CollectionReader reader = createReader( // FileSystemReader.class, // createTypeSystemDescription(), // FileSystemReader.PARAM_INPUTDIR, getTestResource( // "test_files/annotator/TreeTaggerPosLemmaTT4J/multiDoc")); // // AnalysisEngine sentenceSplitter = createEngine( // BreakIteratorSegmenter.class, // tsd); // // AnalysisEngine tt = createEngine(TreeTaggerPosLemmaTT4J.class, tsd, // TreeTaggerTT4JBase.PARAM_LANGUAGE_CODE, "en"); // // runPipeline( // reader, // sentenceSplitter, // tt); // } /* * Uncomment to test explicitly setting model/binary locations */ // @Test // public void testExplicitBinaryModel() throws Exception // { // AnalysisEngine tt = createEngine(TreeTaggerPosTagger.class, // TreeTaggerPosTagger.PARAM_EXECUTABLE_PATH, // "/Applications/tree-tagger-MacOSX-3.2-intel/bin/tree-tagger", // TreeTaggerPosTagger.PARAM_MODEL_LOCATION, // "/Applications/tree-tagger-MacOSX-3.2-intel/models/german-par-linux-3.2-utf8.bin", // TreeTaggerPosTagger.PARAM_MODEL_ENCODING, "UTF-8"); // // JCas jcas = JCasFactory.createJCas(); // jcas.setDocumentLanguage("de"); // // TokenBuilder<Token, Sentence> tb = new TokenBuilder<Token, Sentence>(Token.class, // Sentence.class); // tb.buildTokens(jcas, "Dies ist ein test ."); // // tt.process(jcas); // } @Rule public DkproTestContext testContext = new DkproTestContext(); }