/*
* Copyright 2012
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.maltparser;
import static de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations.*;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.junit.Assume;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
import de.tudarmstadt.ukp.dkpro.core.hunpos.HunPosTagger;
import de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner;
/**
*/
public class MaltParserTest
{
// /**
// * This test really only checks the tagsets and if any dependencies are created. Since we
// * currently to not have a POS tagger for Bengali, the dependencies are just bogus.
// */
// @Test
// public void testBengali()
// throws Exception
// {
// JCas jcas = runTest(
// "dummy-bn",
// "linear",
// "আমরা যতটা সম্ভব উপাদানসমূহের ও নির্ভরতা রয়েছে যা একটি খুব জটিল উদাহরণ বাক্য, প্রয়োজন .");
//
// String[] dependencies = {
// "[ 0, 4]Dependency(pof) D[0,4](আমরা) G[5,9](যতটা)",
// "[ 10, 15]Dependency(r6) D[10,15](সম্ভব) G[5,9](যতটা)",
// "[ 16, 28]Dependency(r6) D[16,28](উপাদানসমূহের) G[10,15](সম্ভব)",
// "[ 29, 30]Dependency(r6) D[29,30](ও) G[16,28](উপাদানসমূহের)",
// "[ 31, 39]Dependency(r6) D[31,39](নির্ভরতা) G[29,30](ও)",
// "[ 40, 46]Dependency(r6) D[40,46](রয়েছে) G[31,39](নির্ভরতা)",
// "[ 47, 49]Dependency(r6) D[47,49](যা) G[40,46](রয়েছে)",
// "[ 50, 54]Dependency(r6) D[50,54](একটি) G[47,49](যা)",
// "[ 55, 58]Dependency(r6) D[55,58](খুব) G[50,54](একটি)",
// "[ 59, 63]Dependency(r6) D[59,63](জটিল) G[55,58](খুব)",
// "[ 64, 70]Dependency(r6) D[64,70](উদাহরণ) G[59,63](জটিল)",
// "[ 71, 77]Dependency(r6) D[71,77](বাক্য,) G[64,70](উদাহরণ)",
// "[ 78, 86]Dependency(r6) D[78,86](প্রয়োজন) G[71,77](বাক্য,)",
// "[ 87, 88]Dependency(pof) D[87,88](.) G[78,86](প্রয়োজন)" };
//
// String[] posTags = { "CC", "DEM", "ECH", "INJ", "INTF", "JJ", "JJ:?", "NEG",
// "NN", "NNP", "NRP", "NST", "NULL", "NULL__VGF", "PRP", "PSP", "QC", "QF", "QO",
// "RB", "RDP", "RP", "SYM", "SYM:?", "UT", "VAUX", "VM", "WQ", "XC" };
//
// String[] depTags = { "CCP", "CCP2", "CCP3", "CCP4", "CCP6", "NP", "NP2",
// "NP3", "NP4", "NP5", "NULL__CCP", "NULL__CCP2", "NULL__VGF", "NULL__VGF2", "RBP",
// "ROOT", "VGF", "VGF2", "VGF3", "VGF4", "VGINF", "VGNF", "VGNN", "VM", "cccof",
// "ccof", "ccop", "fragof", "jjmod", "jjmod__relc", "jk1", "jmod", "k*u", "k1",
// "k1S", "k1s", "k2", "k2-ras", "k2g", "k2p", "k2s", "k3", "k4", "k5", "k7", "k7p",
// "k7t", "nmod", "nmod-relc", "nmod__relc", "nmod_relc", "nmod_relc", "pk1", "pof",
// "r6", "rad", "ras", "ras-k2", "rbmod__relc", "rd", "rh", "rs", "rt", "sent_adv",
// "vmod" };
//
// String[] unmappedPos = { "ECH", "JJ:?", "NRP", "NST", "NULL", "NULL__VGF",
// "QO", "SYM:?", "UT", "XC" };
//
// assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
// assertTagset(POS.class, "utpal", posTags, jcas);
// assertTagsetMapping(POS.class, "utpal", unmappedPos, jcas);
// assertTagset(Dependency.class, "utpal", depTags, jcas);
// // FIXME assertTagsetMapping(Dependency.class, "ftb", new String[] {},
// jcas);
// }
@Ignore("The parser model 'catmalt.mco' is created by MaltParser 1.4.1.")
@Test
public void testCatalan()
throws Exception
{
JCas jcas = runTest(
"dummy-ca",
"linear",
"Necessitem una oració d'exemple molt complicat "
+ ", que conté la major quantitat de components i dependències com sigui possible .");
String[] dependencies = {
"[ 7, 13]Dependency(adjunct) D[7,13](bardzo) G[28,36](przykład)",
"[ 14, 27]Dependency(mwe) D[14,27](skomplikowany) G[7,13](bardzo)",
"[ 28, 36]Dependency(pred) D[28,36](przykład) G[0,6](Musimy)",
"[ 37, 43]Dependency(mwe) D[37,43](zdanie) G[28,36](przykład)",
"[ 44, 45]Dependency(punct) D[44,45](,) G[28,36](przykład)",
"[ 46, 51]Dependency(pred) D[46,51](które) G[44,45](,)",
"[ 52, 59]Dependency(pred) D[52,59](zawiera) G[46,51](które)",
"[ 60, 63]Dependency(mwe) D[60,63](tak) G[52,59](zawiera)",
"[ 64, 69]Dependency(punct) D[64,69](wiele) G[52,59](zawiera)",
"[ 70, 80]Dependency(pred) D[70,80](składników) G[64,69](wiele)",
"[ 81, 82]Dependency(mwe) D[81,82](i) G[70,80](składników)",
"[ 83, 93]Dependency(punct) D[83,93](zależności) G[70,80](składników)",
"[ 94, 95]Dependency(punct) D[94,95](,) G[100,102](to)",
"[ 96, 99]Dependency(mwe) D[96,99](jak) G[100,102](to)",
"[100,102]Dependency(comp_fin) D[100,102](to) G[83,93](zależności)",
"[103,110]Dependency(mwe) D[103,110](możliwe) G[100,102](to)",
"[111,112]Dependency(punct) D[111,112](.) G[100,102](to)" };
String[] posTags = { "adj", "adja", "adjc", "adjp", "adv", "aglt", "bedzie",
"brev", "comp", "conj", "depr", "fin", "ger", "imps", "impt", "inf", "interp",
"num", "pact", "pant", "pcon", "ppas", "ppron12", "ppron3", "praet", "pred",
"prep", "qub", "siebie", "subst", "winien" };
String[] depTags = { "ROOT", "abbrev_punct", "adj", "adjunct", "aglt", "app",
"aux", "comp", "comp_fin", "comp_inf", "complm", "cond", "conjunct", "conjunt",
"coord", "coord_punct", "imp", "mwe", "ne", "neg", "obj", "obj_th", "pd",
"pre_coord", "pred", "punct", "refl", "subj" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
assertTagset(MaltParser.class, POS.class, "nkjp", posTags, jcas);
// FIXME assertTagsetMapping(POS.class, "freeling", new String[] {},
// jcas);
assertTagset(MaltParser.class, Dependency.class, "pdp", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "iula", new String[] {},
// jcas);
}
@Test
public void testEnglishDefault()
throws Exception
{
JCas jcas = runTest("en", null, "We need a very complicated example sentence , which "
+ "contains as many constituents and dependencies as possible .");
String[] dependencies = {
"[ 0, 2]Dependency(nsubj,basic) D[0,2](We) G[3,7](need)",
"[ 3, 7]ROOT(ROOT,basic) D[3,7](need) G[3,7](need)",
"[ 8, 9]Dependency(det,basic) D[8,9](a) G[35,43](sentence)",
"[ 10, 14]Dependency(advmod,basic) D[10,14](very) G[15,26](complicated)",
"[ 15, 26]Dependency(amod,basic) D[15,26](complicated) G[35,43](sentence)",
"[ 27, 34]Dependency(nn,basic) D[27,34](example) G[35,43](sentence)",
"[ 35, 43]Dependency(dobj,basic) D[35,43](sentence) G[3,7](need)",
"[ 44, 45]Dependency(punct,basic) D[44,45](,) G[35,43](sentence)",
"[ 46, 51]Dependency(nsubj,basic) D[46,51](which) G[52,60](contains)",
"[ 52, 60]Dependency(rcmod,basic) D[52,60](contains) G[35,43](sentence)",
"[ 61, 63]Dependency(prep,basic) D[61,63](as) G[52,60](contains)",
"[ 64, 68]Dependency(amod,basic) D[64,68](many) G[69,81](constituents)",
"[ 69, 81]Dependency(pobj,basic) D[69,81](constituents) G[61,63](as)",
"[ 82, 85]Dependency(cc,basic) D[82,85](and) G[69,81](constituents)",
"[ 86, 98]Dependency(conj,basic) D[86,98](dependencies) G[69,81](constituents)",
"[ 99,101]Dependency(prep,basic) D[99,101](as) G[69,81](constituents)",
"[102,110]Dependency(pobj,basic) D[102,110](possible) G[99,101](as)",
"[111,112]Dependency(punct,basic) D[111,112](.) G[3,7](need)" };
String[] posTags = { "#", "$", "''", "(", ")", ",", ".", ":", "CC", "CD",
"DT", "EX", "FW", "IN", "JJ", "JJR", "JJS", "LS", "MD", "NN", "NNP", "NNPS", "NNS",
"PDT", "POS", "PRP", "PRP$", "PRT", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH",
"VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``" };
String[] depTags = { "ROOT", "abbrev", "acomp", "advcl", "advmod", "amod",
"appos", "attr", "aux", "auxpass", "cc", "ccomp", "complm", "conj", "cop", "csubj",
"csubjpass", "dep", "det", "dobj", "expl", "infmod", "iobj", "mark", "measure",
"neg", "nn", "nsubj", "nsubjpass", "null", "num", "number", "parataxis", "partmod",
"pcomp", "pobj", "poss", "possessive", "preconj", "pred", "predet", "prep", "prt",
"punct", "purpcl", "quantmod", "rcmod", "rel", "tmod", "xcomp" };
String[] unmappedPos = { "PRT" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
assertTagset(MaltParser.class, POS.class, "ptb", posTags, jcas);
assertTagsetMapping(MaltParser.class, POS.class, "ptb", unmappedPos, jcas);
assertTagset(MaltParser.class, Dependency.class, "stanford", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "stanford", new String[]
// {}, jcas);
}
@Test
public void testEnglishLinear()
throws Exception
{
JCas jcas = runTest("en", "linear", "We need a very complicated example sentence , which "
+ "contains as many constituents and dependencies as possible .");
String[] dependencies = {
"[ 0, 2]Dependency(nsubj,basic) D[0,2](We) G[3,7](need)",
"[ 3, 7]ROOT(ROOT,basic) D[3,7](need) G[3,7](need)",
"[ 8, 9]Dependency(det,basic) D[8,9](a) G[35,43](sentence)",
"[ 10, 14]Dependency(advmod,basic) D[10,14](very) G[15,26](complicated)",
"[ 15, 26]Dependency(amod,basic) D[15,26](complicated) G[35,43](sentence)",
"[ 27, 34]Dependency(nn,basic) D[27,34](example) G[35,43](sentence)",
"[ 35, 43]Dependency(dobj,basic) D[35,43](sentence) G[3,7](need)",
"[ 44, 45]Dependency(punct,basic) D[44,45](,) G[35,43](sentence)",
"[ 46, 51]Dependency(nsubj,basic) D[46,51](which) G[52,60](contains)",
"[ 52, 60]Dependency(rcmod,basic) D[52,60](contains) G[35,43](sentence)",
"[ 61, 63]Dependency(prep,basic) D[61,63](as) G[52,60](contains)",
"[ 64, 68]Dependency(amod,basic) D[64,68](many) G[69,81](constituents)",
"[ 69, 81]Dependency(pobj,basic) D[69,81](constituents) G[61,63](as)",
"[ 82, 85]Dependency(cc,basic) D[82,85](and) G[69,81](constituents)",
"[ 86, 98]Dependency(conj,basic) D[86,98](dependencies) G[69,81](constituents)",
"[ 99,101]Dependency(prep,basic) D[99,101](as) G[69,81](constituents)",
"[102,110]Dependency(pobj,basic) D[102,110](possible) G[99,101](as)",
"[111,112]Dependency(punct,basic) D[111,112](.) G[3,7](need)" };
String[] posTags = { "#", "$", "''", "(", ")", ",", ".", ":", "CC", "CD",
"DT", "EX", "FW", "IN", "JJ", "JJR", "JJS", "LS", "MD", "NN", "NNP", "NNPS", "NNS",
"PDT", "POS", "PRP", "PRP$", "PRT", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH",
"VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``" };
String[] depTags = { "ROOT", "abbrev", "acomp", "advcl", "advmod", "amod",
"appos", "attr", "aux", "auxpass", "cc", "ccomp", "complm", "conj", "cop", "csubj",
"csubjpass", "dep", "det", "dobj", "expl", "infmod", "iobj", "mark", "measure",
"neg", "nn", "nsubj", "nsubjpass", "null", "num", "number", "parataxis", "partmod",
"pcomp", "pobj", "poss", "possessive", "preconj", "pred", "predet", "prep", "prt",
"punct", "purpcl", "quantmod", "rcmod", "rel", "tmod", "xcomp" };
String[] unmappedPos = { "PRT" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
//assertTagset(OpenNlpPosTagger.class, POS.class, "ptb", posTags, jcas);
assertTagset(MaltParser.class, POS.class, "ptb", posTags, jcas);
assertTagsetMapping(MaltParser.class, POS.class, "ptb", unmappedPos, jcas);
assertTagset(MaltParser.class, Dependency.class, "stanford", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "stanford", new String[]
// {}, jcas);
}
@Test
public void testEnglishPoly()
throws Exception
{
JCas jcas = runTest("en", "poly", "We need a very complicated example sentence , which "
+ "contains as many constituents and dependencies as possible .");
String[] dependencies = {
"[ 0, 2]Dependency(nsubj,basic) D[0,2](We) G[3,7](need)",
"[ 3, 7]ROOT(ROOT,basic) D[3,7](need) G[3,7](need)",
"[ 8, 9]Dependency(det,basic) D[8,9](a) G[35,43](sentence)",
"[ 10, 14]Dependency(advmod,basic) D[10,14](very) G[15,26](complicated)",
"[ 15, 26]Dependency(amod,basic) D[15,26](complicated) G[35,43](sentence)",
"[ 27, 34]Dependency(nn,basic) D[27,34](example) G[35,43](sentence)",
"[ 35, 43]Dependency(dobj,basic) D[35,43](sentence) G[3,7](need)",
"[ 44, 45]Dependency(punct,basic) D[44,45](,) G[35,43](sentence)",
"[ 46, 51]Dependency(nsubj,basic) D[46,51](which) G[52,60](contains)",
"[ 52, 60]Dependency(rcmod,basic) D[52,60](contains) G[35,43](sentence)",
"[ 61, 63]Dependency(prep,basic) D[61,63](as) G[52,60](contains)",
"[ 64, 68]Dependency(amod,basic) D[64,68](many) G[69,81](constituents)",
"[ 69, 81]Dependency(pobj,basic) D[69,81](constituents) G[61,63](as)",
"[ 82, 85]Dependency(cc,basic) D[82,85](and) G[69,81](constituents)",
"[ 86, 98]Dependency(conj,basic) D[86,98](dependencies) G[69,81](constituents)",
"[ 99,101]Dependency(prep,basic) D[99,101](as) G[69,81](constituents)",
"[102,110]Dependency(pobj,basic) D[102,110](possible) G[99,101](as)",
"[111,112]Dependency(punct,basic) D[111,112](.) G[3,7](need)" };
String[] posTags = { "#", "$", "''", "(", ")", ",", ".", ":", "CC", "CD",
"DT", "EX", "FW", "IN", "JJ", "JJR", "JJS", "LS", "MD", "NN", "NNP", "NNPS", "NNS",
"PDT", "POS", "PRP", "PRP$", "PRT", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH",
"VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``" };
String[] depTags = { "ROOT", "abbrev", "acomp", "advcl", "advmod", "amod",
"appos", "attr", "aux", "auxpass", "cc", "ccomp", "complm", "conj", "cop", "csubj",
"csubjpass", "dep", "det", "dobj", "expl", "infmod", "iobj", "mark", "measure",
"neg", "nn", "nsubj", "nsubjpass", "null", "num", "number", "parataxis", "partmod",
"pcomp", "pobj", "poss", "possessive", "preconj", "pred", "predet", "prep", "prt",
"punct", "purpcl", "quantmod", "rcmod", "rel", "tmod", "xcomp" };
String[] unmappedPos = { "PRT" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
// There are some minor differences between the tags produced by the POS tagger and the
// tags expected by the parser model. We need a better test here that makes these differences
// more visible and at the same time doesn't fail.
//assertTagset(OpenNlpPosTagger.class, POS.class, "ptb", posTags, jcas);
assertTagset(MaltParser.class, POS.class, "ptb", posTags, jcas);
assertTagsetMapping(MaltParser.class, POS.class, "ptb", unmappedPos, jcas);
assertTagset(MaltParser.class, Dependency.class, "stanford", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "stanford", new String[]
// {}, jcas);
}
/**
* Actually, we have no POS tagger for Polish...
*
* @throws Exception
* if an error occurs.
*/
@Test
public void testPolish()
throws Exception
{
JCas jcas = runTest("dummy-pl", "linear",
"Musimy bardzo skomplikowany przykład zdanie , które "
+ "zawiera tak wiele składników i zależności , jak to możliwe .");
String[] dependencies = {
"[ 0, 6]ROOT(ROOT,basic) D[0,6](Musimy) G[0,6](Musimy)",
"[ 7, 13]Dependency(adjunct,basic) D[7,13](bardzo) G[28,36](przykład)",
"[ 14, 27]Dependency(mwe,basic) D[14,27](skomplikowany) G[7,13](bardzo)",
"[ 28, 36]Dependency(pred,basic) D[28,36](przykład) G[0,6](Musimy)",
"[ 37, 43]Dependency(mwe,basic) D[37,43](zdanie) G[28,36](przykład)",
"[ 44, 45]Dependency(punct,basic) D[44,45](,) G[28,36](przykład)",
"[ 46, 51]Dependency(pred,basic) D[46,51](które) G[44,45](,)",
"[ 52, 59]Dependency(pred,basic) D[52,59](zawiera) G[46,51](które)",
"[ 60, 63]Dependency(mwe,basic) D[60,63](tak) G[52,59](zawiera)",
"[ 64, 69]Dependency(punct,basic) D[64,69](wiele) G[52,59](zawiera)",
"[ 70, 80]Dependency(pred,basic) D[70,80](składników) G[64,69](wiele)",
"[ 81, 82]Dependency(mwe,basic) D[81,82](i) G[70,80](składników)",
"[ 83, 93]Dependency(punct,basic) D[83,93](zależności) G[70,80](składników)",
"[ 94, 95]Dependency(punct,basic) D[94,95](,) G[100,102](to)",
"[ 96, 99]Dependency(mwe,basic) D[96,99](jak) G[100,102](to)",
"[100,102]Dependency(comp_fin,basic) D[100,102](to) G[83,93](zależności)",
"[103,110]Dependency(mwe,basic) D[103,110](możliwe) G[100,102](to)",
"[111,112]Dependency(punct,basic) D[111,112](.) G[100,102](to)" };
String[] posTags = { "adj", "adja", "adjc", "adjp", "adv", "aglt", "bedzie",
"brev", "comp", "conj", "depr", "fin", "ger", "imps", "impt", "inf", "interp",
"num", "pact", "pant", "pcon", "ppas", "ppron12", "ppron3", "praet", "pred",
"prep", "qub", "siebie", "subst", "winien" };
String[] depTags = { "ROOT", "abbrev_punct", "adj", "adjunct", "aglt", "app",
"aux", "comp", "comp_fin", "comp_inf", "complm", "cond", "conjunct", "conjunt",
"coord", "coord_punct", "imp", "mwe", "ne", "neg", "obj", "obj_th", "pd",
"pre_coord", "pred", "punct", "refl", "subj" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
assertTagset(MaltParser.class, POS.class, "nkjp", posTags, jcas);
// FIXME assertTagsetMapping(POS.class, "freeling", new String[] {},
// jcas);
assertTagset(MaltParser.class, Dependency.class, "pdp", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "iula", new String[] {},
// jcas);
}
@Ignore("Model not integrated")
@Test
public void testPortuguese()
throws Exception
{
JCas jcas = runTest("pt", "linear",
"Precisamos de uma frase exemplo muito complicado , que "
+ "contém o maior número de eleitores e dependências possível .");
String[] dependencies = {
"[ 11, 13]Dependency(N<) D[11,13](de) G[0,10](Precisamos)",
"[ 14, 17]Dependency(>N) D[14,17](uma) G[18,23](frase)",
"[ 18, 23]Dependency(P<) D[18,23](frase) G[11,13](de)",
"[ 24, 31]Dependency(A<) D[24,31](exemplo) G[18,23](frase)",
"[ 32, 37]Dependency(ADVL) D[32,37](muito) G[38,48](complicado)",
"[ 38, 48]Dependency(N<) D[38,48](complicado) G[24,31](exemplo)",
"[ 49, 50]Dependency(PUNC) D[49,50](,) G[18,23](frase)",
"[ 51, 54]Dependency(SUBJ) D[51,54](que) G[55,61](contém)",
"[ 55, 61]Dependency(N<PRED) D[55,61](contém) G[18,23](frase)",
"[ 62, 63]Dependency(>N) D[62,63](o) G[70,76](número)",
"[ 64, 69]Dependency(>N) D[64,69](maior) G[70,76](número)",
"[ 70, 76]Dependency(ACC) D[70,76](número) G[55,61](contém)",
"[ 77, 79]Dependency(N<) D[77,79](de) G[70,76](número)",
"[ 80, 89]Dependency(P<) D[80,89](eleitores) G[77,79](de)",
"[ 90, 91]Dependency(CO) D[90,91](e) G[80,89](eleitores)",
"[ 92,104]Dependency(CJT) D[92,104](dependências) G[80,89](eleitores)",
"[105,113]Dependency(N<) D[105,113](possível) G[92,104](dependências)",
"[114,115]Dependency(PUNC) D[114,115](.) G[0,10](Precisamos)" };
String[] posTags = { "?", "adj", "adv", "art", "conj-c", "conj-s", "ec", "in",
"n", "num", "pp", "pron-det", "pron-indp", "pron-pers", "prop", "prp", "punc",
"v-fin", "v-ger", "v-inf", "v-pcp", "vp" };
String[] depTags = { ">A", ">N", ">P", ">S", "?", "A<", "A<PRED", "ACC",
"ACC-PASS", "ACC>-PASS", "ADVL", "ADVO", "ADVS", "APP", "AS<", "AUX", "AUX<",
"CJT", "CJT&ADVL", "CJT&PRED", "CMD", "CO", "COM", "DAT", "EXC", "FOC", "H",
"KOMP<", "MV", "N<", "N<PRED", "NUM<", "OC", "P", "P<", "PASS", "PCJT", "PIV",
"PMV", "PRD", "PRED", "PRT-AUX", "PRT-AUX<", "PRT-AUX>", "PUNC", "QUE", "S<", "SC",
"STA", "SUB", "SUBJ", "TOP", "UTT", "VOC", "VOK" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
assertTagset(MaltParser.class, POS.class, "bosque", posTags, jcas);
// FIXME assertTagsetMapping(POS.class, "freeling", new String[] {},
// jcas);
assertTagset(MaltParser.class, Dependency.class, "unknown", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "iula", new String[] {},
// jcas);
}
/**
* This test really only checks the tagsets and if any dependencies are created. Since the POS
* tags expected by the Spanish model do <b>not</b> correspond to those that the pos tagger
* running here produces, the dependencies are bogus.
*
* @throws Exception
* if an error occurs.
*/
@Test
public void testSpanishLinear()
throws Exception
{
JCas jcas = runTest("dummy-es", "linear",
"Tenemos un ejemplo de frase muy complicado, que "
+ "contiene tantas componentes y dependencias como sea posible .");
String[] dependencies = {
"[ 0, 7]ROOT(ROOT,basic) D[0,7](Tenemos) G[0,7](Tenemos)",
"[ 8, 10]Dependency(SPEC,basic) D[8,10](un) G[11,18](ejemplo)",
"[ 11, 18]Dependency(SUBJ,basic) D[11,18](ejemplo) G[19,21](de)",
"[ 19, 21]Dependency(MOD,basic) D[19,21](de) G[0,7](Tenemos)",
"[ 22, 27]Dependency(MOD,basic) D[22,27](frase) G[32,43](complicado,)",
"[ 28, 31]Dependency(SPEC,basic) D[28,31](muy) G[32,43](complicado,)",
"[ 32, 43]Dependency(SUBJ,basic) D[32,43](complicado,) G[44,47](que)",
"[ 44, 47]Dependency(DO,basic) D[44,47](que) G[19,21](de)",
"[ 48, 56]Dependency(MOD,basic) D[48,56](contiene) G[57,63](tantas)",
"[ 57, 63]Dependency(SUBJ,basic) D[57,63](tantas) G[78,90](dependencias)",
"[ 64, 75]Dependency(punct,basic) D[64,75](componentes) G[57,63](tantas)",
"[ 76, 77]Dependency(MOD,basic) D[76,77](y) G[78,90](dependencias)",
"[ 78, 90]Dependency(MOD,basic) D[78,90](dependencias) G[0,7](Tenemos)",
"[ 91, 95]Dependency(MOD,basic) D[91,95](como) G[96,99](sea)",
"[ 96, 99]Dependency(SUBJ,basic) D[96,99](sea) G[100,107](posible)",
"[100,107]Dependency(MOD,basic) D[100,107](posible) G[78,90](dependencias)",
"[108,109]Dependency(punct,basic) D[108,109](.) G[100,107](posible)" };
String[] posTags = { "AO0FP0", "AO0FS0", "AO0MP0", "AO0MS0", "AQ0CN0",
"AQ0CP0", "AQ0CS0", "AQ0FP0", "AQ0FS0", "AQ0FSP", "AQ0MP0", "AQ0MPP", "AQ0MS0",
"AQ0MSP", "AQDFS0", "AQDMP0", "AQDMS0", "AQSFP0", "AQSFS0", "AQSMP0", "AQSMS0",
"CC", "CS", "DA0FP0", "DA0FS0", "DA0MP0", "DA0MS0", "DA0NS0", "DD0CP0", "DD0CS0",
"DD0FP0", "DD0FS0", "DD0MP0", "DD0MS0", "DI0CP0", "DI0CS0", "DI0FP0", "DI0FS0",
"DI0MP0", "DI0MS0", "DP1CPS", "DP1CSS", "DP1FPP", "DP1FSP", "DP1MPP", "DP1MSP",
"DP2CSS", "DP3CP0", "DP3CS0", "DT0CN0", "DT0FP0", "Fat", "Fc", "Fd", "Fe", "Fia",
"Fit", "Fp", "Fpa", "Fpt", "Fra", "Frc", "Fs", "Fx", "I", "NC00000", "NCCN000",
"NCCP000", "NCCS000", "NCFN000", "NCFP000", "NCFP00A", "NCFP00D", "NCFS000",
"NCFS00A", "NCFS00X", "NCMN000", "NCMP000", "NCMP00A", "NCMP00D", "NCMS000",
"NCMS00A", "NCMS00D", "NP00000", "P03CN000", "PD0CP000", "PD0CS000", "PD0FP000",
"PD0FS000", "PD0MP000", "PD0MS000", "PD0NS000", "PI0CC000", "PI0CP000", "PI0CS000",
"PI0FP000", "PI0FS000", "PI0MP000", "PI0MS000", "PP1CP000", "PP1CS000", "PP1CSN00",
"PP1CSO00", "PP1MP000", "PP2CP00P", "PP2CS000", "PP2CS00P", "PP3CN000", "PP3CNO00",
"PP3CPD00", "PP3CSD00", "PP3FP000", "PP3FPA00", "PP3FS000", "PP3FSA00", "PP3MP000",
"PP3MPA00", "PP3MS000", "PP3MSA00", "PP3NS000", "PR000000", "PR0CN000", "PR0CP000",
"PR0CS000", "PR0FP000", "PR0FS000", "PR0MP000", "PR0MS000", "PT000000", "PT0CN000",
"PT0CP000", "PT0CS000", "PT0MP000", "PT0MS000", "PX1FP0P0", "PX1FS0P0", "PX1MS0P0",
"PX3MS0C0", "PX3NS0C0", "RG", "RN", "SPS00", "VAG0000", "VAIC1P0", "VAIC3P0",
"VAIC4S0", "VAIF1P0", "VAIF3P0", "VAIF3S0", "VAII1P0", "VAII3P0", "VAII4S0",
"VAIP1P0", "VAIP1S0", "VAIP3P0", "VAIP3S0", "VAIS3P0", "VAIS3S0", "VAN0000",
"VAP00SM", "VASF3P0", "VASF4S0", "VASI1P0", "VASI3P0", "VASI4S0", "VASP1P0",
"VASP3P0", "VASP4S0", "VMG0000", "VMIB1P0", "VMIC1P0", "VMIC3P0", "VMIC3S0",
"VMIC4S0", "VMIF1P0", "VMIF1S0", "VMIF3P0", "VMIF3S0", "VMII1P0", "VMII2S0",
"VMII3P0", "VMII3S0", "VMII4S0", "VMIP1P0", "VMIP1S0", "VMIP2S0", "VMIP3P0",
"VMIP3S0", "VMIS1P0", "VMIS1S0", "VMIS3P0", "VMIS3S0", "VMM01P0", "VMM02S0",
"VMM03P0", "VMM03S0", "VMN0000", "VMP00PF", "VMP00PM", "VMP00SF", "VMP00SM",
"VMSF2S0", "VMSF3P0", "VMSF3S0", "VMSF4S0", "VMSI1P0", "VMSI3P0", "VMSI3S0",
"VMSI4S0", "VMSP1P0", "VMSP1S0", "VMSP2S0", "VMSP3P0", "VMSP3S0", "VMSP4S0",
"VSG0000", "VSIC3P0", "VSIC4S0", "VSIF3P0", "VSIF3S0", "VSII3P0", "VSII4S0",
"VSIP1P0", "VSIP1S0", "VSIP3P0", "VSIP3S0", "VSIS3P0", "VSIS3S0", "VSN0000",
"VSP00SM", "VSSF3P0", "VSSF4S0", "VSSI3P0", "VSSI4S0", "VSSP3P0", "VSSP4S0", "W",
"Z", "ZD", "ZP", "_" };
String[] depTags = { "ADV", "ATR", "AUX", "BYAG", "COMP", "COMP-GAP", "COMPL",
"CONJ", "COORD", "DO", "IO", "MIMPERS", "MOD", "MOD-GAP", "MPAS", "MPRON", "OBLC",
"OPRD", "PP-DIR", "PP-LOC", "PRD", "PRDC", "ROOT", "SPEC", "SUBJ", "SUBJ-GAP", "_",
"punct" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
assertTagset(MaltParser.class, POS.class, "freeling", posTags, jcas);
// FIXME assertTagsetMapping(POS.class, "freeling", new String[] {},
// jcas);
assertTagset(MaltParser.class, Dependency.class, "iula", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "iula", new String[] {},
// jcas);
}
@Test
public void testGerman()
throws Exception
{
checkModel("de", "linear");
JCas jcas = runTest("de", "linear", "Wir brauchen ein sehr kompliziertes Beispiel , "
+ "welches möglichst viele Konstituenten und Dependenzen beinhaltet .");
String[] dependencies = {
"[ 4, 12]ROOT(ROOT) D[4,12](brauchen) G[0,3](Wir)",
"[ 13, 16]Dependency(DET) D[13,16](ein) G[4,12](brauchen)",
"[ 17, 21]Dependency(DET) D[17,21](sehr) G[13,16](ein)",
"[ 22, 35]Dependency(DET) D[22,35](kompliziertes) G[36,44](Beispiel)",
"[ 36, 44]Dependency(GMOD) D[36,44](Beispiel) G[17,21](sehr)",
"[ 45, 46]Dependency(KON) D[45,46](,) G[4,12](brauchen)",
"[ 47, 54]Dependency(DET) D[47,54](welches) G[45,46](,)",
"[ 55, 64]Dependency(DET) D[55,64](möglichst) G[47,54](welches)",
"[ 65, 70]Dependency(DET) D[65,70](viele) G[55,64](möglichst)",
"[ 71, 84]Dependency(DET) D[71,84](Konstituenten) G[65,70](viele)",
"[ 85, 88]Dependency(DET) D[85,88](und) G[71,84](Konstituenten)",
"[ 89,100]Dependency(DET) D[89,100](Dependenzen) G[85,88](und)",
"[101,111]Dependency(PN) D[101,111](beinhaltet) G[112,113](.)",
"[112,113]Dependency(-PUNCT-) D[112,113](.) G[89,100](Dependenzen)" };
String[] posTags = { "$(", "$,", "$.", "ADJA", "ADJD", "ADV", "APPO", "APPR",
"APPRART", "APZR", "ART", "CARD", "FM", "ITJ", "KOKOM", "KON", "KOUI", "KOUS",
"NE", "NN", "PDAT", "PDS", "PIAT", "PIDAT", "PIS", "PPER", "PPOSAT", "PPOSS",
"PRELAT", "PRELS", "PRF", "PROP", "PTKA", "PTKANT", "PTKNEG", "PTKVZ", "PTKZU",
"PWAT", "PWAV", "PWS", "TRUNC", "VAFIN", "VAIMP", "VAINF", "VAPP", "VMFIN",
"VMINF", "VMPP", "VVFIN", "VVIMP", "VVINF", "VVIZU", "VVPP", "XY" };
String[] depTags = { "-PUNCT-", "-UNKNOWN-", "ADV", "APP", "ATTR", "AUX",
"AVZ", "CJ", "DET", "EXPL", "GMOD", "GRAD", "KOM", "KON", "KONJ", "NEB", "OBJA",
"OBJC", "OBJD", "OBJG", "OBJI", "OBJP", "PAR", "PART", "PN", "PP", "PRED", "REL",
"ROOT", "S", "SUBJ", "SUBJC", "ZEIT", "gmod-app", "koord" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
assertTagset(OpenNlpPosTagger.class, POS.class, "ptb", posTags, jcas);
assertTagset(MaltParser.class, POS.class, "stts", posTags, jcas);
assertTagsetMapping(MaltParser.class, POS.class, "stts", new String[] {}, jcas);
assertTagset(MaltParser.class, Dependency.class, "cdg", depTags, jcas);
assertTagsetMapping(MaltParser.class, Dependency.class, "cdg", new String[] {}, jcas);
}
@Test
public void testSwedish()
throws Exception
{
JCas jcas = runTest("sv", "linear",
"Vi behöver en mycket komplicerad exempel meningen som "
+ "innehåller lika många beståndsdelar och beroenden som möjligt.");
String[] dependencies = {
"[ 0, 2]ROOT(ROOT,basic) D[0,2](Vi) G[0,2](Vi)",
"[ 3, 10]Dependency(HD,basic) D[3,10](behöver) G[0,2](Vi)",
"[ 11, 13]Dependency(HD,basic) D[11,13](en) G[3,10](behöver)",
"[ 14, 20]Dependency(HD,basic) D[14,20](mycket) G[11,13](en)",
"[ 21, 32]Dependency(HD,basic) D[21,32](komplicerad) G[14,20](mycket)",
"[ 33, 40]Dependency(HD,basic) D[33,40](exempel) G[21,32](komplicerad)",
"[ 41, 49]Dependency(HD,basic) D[41,49](meningen) G[0,2](Vi)",
"[ 50, 53]Dependency(HD,basic) D[50,53](som) G[41,49](meningen)",
"[ 54, 64]Dependency(HD,basic) D[54,64](innehåller) G[41,49](meningen)",
"[ 65, 69]Dependency(HD,basic) D[65,69](lika) G[41,49](meningen)",
"[ 70, 75]Dependency(HD,basic) D[70,75](många) G[41,49](meningen)",
"[ 76, 89]Dependency(HD,basic) D[76,89](beståndsdelar) G[33,40](exempel)",
"[ 90, 93]Dependency(HD,basic) D[90,93](och) G[76,89](beståndsdelar)",
"[ 94,103]Dependency(+F,basic) D[94,103](beroenden) G[90,93](och)",
"[104,107]Dependency(HD,basic) D[104,107](som) G[94,103](beroenden)",
"[108,116]Dependency(HD,basic) D[108,116](möjligt.) G[0,2](Vi)" };
String[] posTags = { "AB", "DT", "HA", "HD", "HP", "HS", "IE", "IN", "JJ",
"KN", "MAD", "MID", "NN", "PAD", "PC", "PL", "PM", "PN", "PP", "PS", "RG", "RO",
"SN", "UO", "VB" };
String[] depTags = { "+A", "+F", "AA", "AG", "AN", "AT", "CA", "CJ", "DB",
"DT", "EF", "EO", "ES", "ET", "FO", "FP", "FS", "FV", "HA", "HD", "I?", "IC", "IF",
"IG", "IK", "IO", "IP", "IQ", "IR", "IS", "IT", "IU", "JC", "JG", "JR", "JT", "KA",
"MA", "MS", "NA", "OA", "OO", "OP", "PA", "PL", "PT", "RA", "ROOT", "SP", "SS",
"TA", "UA", "VA", "VG", "VO", "VS", "XA", "XF", "XT", "XX", "YY" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
assertTagset(MaltParser.class, POS.class, "suc", posTags, jcas);
// FIXME assertTagsetMapping(POS.class, "stb", new String[] {}, jcas);
assertTagset(MaltParser.class, Dependency.class, "stb", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "stb", new String[] {},
// jcas);
}
@Test
public void testFarsi()
throws Exception
{
JCas jcas = runTest(
"fa",
"linear",
"ما به عنوان مثال جمله بسیار پیچیده، که شامل به عنوان بسیاری از مولفه ها و وابستگی ها که ممکن است نیاز دارید .");
String[] dependencies = {
"[ 0, 2]Dependency(nsubj,basic) D[0,2](ما) G[102,107](دارید)",
"[ 3, 5]Dependency(prep,basic) D[3,5](به) G[102,107](دارید)",
"[ 6, 11]Dependency(pobj,basic) D[6,11](عنوان) G[3,5](به)",
"[ 12, 16]Dependency(nn,basic) D[12,16](مثال) G[6,11](عنوان)",
"[ 17, 21]Dependency(dobj,basic) D[17,21](جمله) G[102,107](دارید)",
"[ 22, 27]Dependency(advmod,basic) D[22,27](بسیار) G[28,35](پیچیده،)",
"[ 28, 35]Dependency(amod,basic) D[28,35](پیچیده،) G[17,21](جمله)",
"[ 36, 38]Dependency(rel,basic) D[36,38](که) G[39,43](شامل)",
"[ 39, 43]Dependency(rcmod,basic) D[39,43](شامل) G[17,21](جمله)",
"[ 44, 46]Dependency(prep,basic) D[44,46](به) G[102,107](دارید)",
"[ 47, 52]Dependency(pobj,basic) D[47,52](عنوان) G[44,46](به)",
"[ 53, 59]Dependency(amod,basic) D[53,59](بسیاری) G[47,52](عنوان)",
"[ 60, 62]Dependency(prep,basic) D[60,62](از) G[53,59](بسیاری)",
"[ 63, 68]Dependency(pobj,basic) D[63,68](مولفه) G[60,62](از)",
"[ 69, 71]Dependency(poss,basic) D[69,71](ها) G[63,68](مولفه)",
"[ 72, 73]Dependency(cc,basic) D[72,73](و) G[69,71](ها)",
"[ 74, 81]Dependency(conj,basic) D[74,81](وابستگی) G[69,71](ها)",
"[ 82, 84]Dependency(poss,basic) D[82,84](ها) G[74,81](وابستگی)",
"[ 85, 87]Dependency(rel,basic) D[85,87](که) G[88,92](ممکن)",
"[ 88, 92]Dependency(rcmod,basic) D[88,92](ممکن) G[82,84](ها)",
"[ 93, 96]Dependency(cop,basic) D[93,96](است) G[88,92](ممکن)",
"[ 97,101]Dependency(dobj-lvc,basic) D[97,101](نیاز) G[102,107](دارید)",
"[102,107]ROOT(ROOT,basic) D[102,107](دارید) G[102,107](دارید)",
"[108,109]Dependency(punct,basic) D[108,109](.) G[102,107](دارید)" };
String[] posTags = { "ADJ", "ADJ_CMPR", "ADJ_INO", "ADJ_SUP", "ADV",
"ADV_COMP", "ADV_I", "ADV_LOC", "ADV_NEG", "ADV_TIME", "CLITIC", "CON", "DELM",
"DET", "FW", "INT", "NUM", "N_PL", "N_SING", "N_VOC", "P", "PREV", "PRO", "V_AUX",
"V_COP", "V_IMP", "V_PA", "V_PP", "V_PRS", "V_SUB" };
String[] depTags = { "acc", "acomp", "acomp-lvc", "acomp-lvc/pc", "acomp/pc",
"advcl", "advcl/cop", "advcl/pc", "advmod", "advmod/pc", "amod", "amod/cop",
"amod/pc", "appos", "appos/pc", "aux", "auxpass", "cc", "ccomp", "ccomp/cop",
"ccomp/pc", "ccomp/pc\\cop", "ccomp\\cpobj", "ccomp\\nsubj", "ccomp\\pobj",
"ccomp\\poss", "complm", "conj", "conj/cop", "conj/pc", "conj\\pobj", "conj\\poss",
"cop", "cop/pc", "cpobj", "cpobj/pc", "cprep", "dep", "dep-top", "dep-top/pc",
"dep-voc", "dep/pc", "det", "dobj", "dobj-lvc", "dobj-lvc/pc", "dobj/acc",
"dobj/pc", "dobj/pc-lvc", "fw", "mark", "mwe", "mwe/pc", "neg", "nn", "nn/cop",
"npadvmod", "nsubj", "nsubj-lvc", "nsubj/pc", "nsubjpass", "nsubjpass/pc", "num",
"number", "parataxis", "parataxis/cop", "parataxis/pc", "pobj", "pobj/cop",
"pobj/pc", "pobj\\cop", "poss", "poss/acc", "poss/cop", "poss/pc", "preconj",
"predet", "prep", "prep-lvc", "prep/det", "prep/pc", "prep/pobj", "prt", "punct",
"quantmod", "rcmod", "rcmod/cop", "rcmod/pc", "rcmod\\amod", "rcmod\\pobj",
"rcmod\\poss", "rel", "root", "root/cop", "root/pc", "root\\amod", "root\\conj",
"root\\pobj", "root\\poss", "tmod", "xcomp" };
String[] unmappedPos = {};
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
assertTagset(MaltParser.class, POS.class, "upc", posTags, jcas);
assertTagsetMapping(POS.class, "upc", unmappedPos, jcas);
assertTagset(MaltParser.class, Dependency.class, "updt", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "ftb", new String[] {},
// jcas);
}
/**
* This test really only checks the tagsets and if any dependencies are created. Since we
* currently to not have a POS tagger for French, the dependencies are just bogus.
*
* @throws Exception
* if an error occurs.
*/
@Ignore("The tags produced by our French TreeTagger model are different form the ones that "
+ "the pre-trained MaltParser model expects. Also the input format in our MaltParser "
+ "class is currently hardcoded to the format used by the English pre-trained model. "
+ "For the French model the 5th column of the input format should contain fine-grained "
+ "tags. See http://www.maltparser.org/mco/french_parser/fremalt.html")
@Test
public void testFrench()
throws Exception
{
JCas jcas = runTest(
"dummy-fr",
"linear",
"Nous avons besoin d'une phrase par exemple très "
+ "compliqué, qui contient des constituants que de nombreuses dépendances et que "
+ "possible.");
String[] dependencies = {
"[ 0, 4]ROOT(ROOT) D[0,4](Nous) G[0,4](Nous)",
"[ 5, 10]ROOT(ROOT) D[5,10](avons) G[5,10](avons)",
"[ 11, 17]ROOT(ROOT) D[11,17](besoin) G[11,17](besoin)",
"[ 18, 23]ROOT(ROOT) D[18,23](d'une) G[18,23](d'une)",
"[ 24, 30]ROOT(ROOT) D[24,30](phrase) G[24,30](phrase)",
"[ 31, 34]ROOT(ROOT) D[31,34](par) G[31,34](par)",
"[ 35, 42]ROOT(ROOT) D[35,42](exemple) G[35,42](exemple)",
"[ 43, 47]ROOT(ROOT) D[43,47](très) G[43,47](très)",
"[ 48, 58]ROOT(ROOT) D[48,58](compliqué,) G[48,58](compliqué,)",
"[ 59, 62]ROOT(ROOT) D[59,62](qui) G[59,62](qui)",
"[ 63, 71]ROOT(ROOT) D[63,71](contient) G[63,71](contient)",
"[ 72, 75]ROOT(ROOT) D[72,75](des) G[72,75](des)",
"[ 76, 88]ROOT(ROOT) D[76,88](constituants) G[76,88](constituants)",
"[ 89, 92]ROOT(ROOT) D[89,92](que) G[89,92](que)",
"[ 93, 95]ROOT(ROOT) D[93,95](de) G[93,95](de)",
"[ 96,106]Dependency(det) D[96,106](nombreuses) G[107,118](dépendances)",
"[107,118]Dependency(obj) D[107,118](dépendances) G[93,95](de)",
"[119,121]Dependency(dep) D[119,121](et) G[107,118](dépendances)",
"[122,125]Dependency(dep) D[122,125](que) G[107,118](dépendances)",
"[126,135]Dependency(dep) D[126,135](possible.) G[107,118](dépendances)" };
String[] posTags = { "/CC", "/P", "/PONCT", "4/DET", "ADJ", "ADJWH", "ADV",
"ADVWH", "CC", "CLO", "CLR", "CLS", "CS", "DET", "DETWH", "ET", "I", "NC", "NPP",
"P", "P+D", "P+PRO", "PONCT", "PREF", "PRO", "PROREL", "PROWH", "V", "VIMP",
"VINF", "VPP", "VPR", "VS", "_9/NC", "_OPE/NC", "_S/ET", "_S/NPP", "_an/NC",
"_h/NC" };
String[] depTags = { "a_obj", "aff", "arg", "ato", "ats", "aux_caus",
"aux_pass", "aux_tps", "comp", "coord", "de_obj", "dep", "dep_coord", "det",
"missinghead", "mod", "mod_rel", "obj", "obj1", "p_obj", "ponct", "root", "suj" };
String[] unmappedPos = { "/CC", "/P", "/PONCT", "4/DET", "_9/NC", "_OPE/NC",
"_S/ET", "_S/NPP", "_an/NC", "_h/NC" };
assertDependencies(dependencies, JCasUtil.select(jcas, Dependency.class));
assertTagset(MaltParser.class, POS.class, "melt", posTags, jcas);
assertTagsetMapping(MaltParser.class, POS.class, "melt", unmappedPos, jcas);
assertTagset(MaltParser.class, Dependency.class, "ftb", depTags, jcas);
// FIXME assertTagsetMapping(Dependency.class, "ftb", new String[] {},
// jcas);
}
private JCas runTest(String aLanguage, String aVariant, String aText)
throws Exception
{
AnalysisEngineDescription engine = getEngines(aLanguage, aVariant);
if (aLanguage.startsWith("dummy-")) {
aLanguage = aLanguage.substring("dummy-".length());
}
return TestRunner.runTest(engine, aLanguage, aText);
}
public static AnalysisEngineDescription getEngines(String aLanguage, String aVariant)
throws ResourceInitializationException
{
List<AnalysisEngineDescription> engines = new ArrayList<AnalysisEngineDescription>();
if (aLanguage.startsWith("dummy-")) {
aLanguage = aLanguage.substring("dummy-".length());
// This is used if we do not have a proper tagger for this language
engines.add(createEngineDescription(OpenNlpPosTagger.class,
OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
}
else if ("fa".equals(aLanguage) || "sv".equals(aLanguage)) {
engines.add(createEngineDescription(HunPosTagger.class));
}
else {
engines.add(createEngineDescription(OpenNlpPosTagger.class));
}
engines.add(createEngineDescription(MaltParser.class, MaltParser.PARAM_VARIANT, aVariant,
MaltParser.PARAM_PRINT_TAGSET, true, MaltParser.PARAM_IGNORE_MISSING_FEATURES, true));
return createEngineDescription(engines
.toArray(new AnalysisEngineDescription[engines.size()]));
}
private void checkModel(String aLanguage, String aVariant)
{
Assume.assumeTrue(getClass().getResource(
"/de/tudarmstadt/ukp/dkpro/core/maltparser/lib/parser-" + aLanguage + "-"
+ aVariant + ".mco") != null);
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}