/*
* Copyright 2012
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.mstparser;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.util.JCasUtil.select;
import org.apache.commons.lang.ArrayUtils;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.jcas.JCas;
import org.junit.Assume;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
import de.tudarmstadt.ukp.dkpro.core.hunpos.HunPosTagger;
import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations;
import de.tudarmstadt.ukp.dkpro.core.testing.AssumeResource;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner;
/**
*/
public class MstParserTest
{
@Ignore("Takes too long")
@Test
public void testCroatianMte5Defnpout()
throws Exception
{
JCas jcas = runTest("hr", "mte5.defnpout", "Moramo vrlo kompliciran primjer rečenicu , "
+ "koja sadrži što više sastojaka i ovisnosti što je više moguće .");
String[] dependencies = {
"[ 0, 6]Dependency(Pred,basic) D[0,6](Moramo) G[41,42](,)",
"[ 7, 11]Dependency(Adv,basic) D[7,11](vrlo) G[12,23](kompliciran)",
"[ 12, 23]Dependency(Atr,basic) D[12,23](kompliciran) G[24,31](primjer)",
"[ 24, 31]Dependency(Ap,basic) D[24,31](primjer) G[32,40](rečenicu)",
"[ 32, 40]Dependency(Sb,basic) D[32,40](rečenicu) G[0,6](Moramo)",
"[ 41, 42]Dependency(Punc,basic) D[41,42](,) G[48,54](sadrži)",
"[ 43, 47]Dependency(Sb,basic) D[43,47](koja) G[48,54](sadrži)",
"[ 48, 54]ROOT(Pred,basic) D[48,54](sadrži) G[48,54](sadrži)",
"[ 55, 58]Dependency(Pred,basic) D[55,58](što) G[74,75](i)",
"[ 59, 63]Dependency(Oth,basic) D[59,63](više) G[74,75](i)",
"[ 64, 73]Dependency(Atr,basic) D[64,73](sastojaka) G[59,63](više)",
"[ 74, 75]Dependency(Co,basic) D[74,75](i) G[48,54](sadrži)",
"[ 76, 85]Dependency(Pred,basic) D[76,85](ovisnosti) G[74,75](i)",
"[ 86, 89]ROOT(Pred,basic) D[86,89](što) G[86,89](što)",
"[ 90, 92]ROOT(Pred,basic) D[90,92](je) G[90,92](je)",
"[ 93, 97]Dependency(Adv,basic) D[93,97](više) G[98,104](moguće)",
"[ 98,104]Dependency(Pnom,basic) D[98,104](moguće) G[90,92](je)",
"[105,106]ROOT(Punc,basic) D[105,106](.) G[105,106](.)" };
String[] posTags = { "<root-POS>", "Afmnpa-", "Afpfsn-", "Afpmpgy", "Afpmply",
"Afpnpgy", "Afpnpn-", "Agcfpn", "Agcfsa", "Agcfsg", "Agcfsi", "Agcfsn", "Agcmpa",
"Agcmpg", "Agcmpn", "Agcmsg", "Agcmsl", "Agcmsn", "Agcnsa", "Agcnsn", "Agpfpa",
"Agpfpd", "Agpfpg", "Agpfpi", "Agpfpl", "Agpfpn", "Agpfsa", "Agpfsd", "Agpfsg",
"Agpfsi", "Agpfsl", "Agpfsn", "Agpmpa", "Agpmpd", "Agpmpg", "Agpmpi", "Agpmpl",
"Agpmpn", "Agpmsan", "Agpmsay", "Agpmsd", "Agpmsg", "Agpmsi", "Agpmsl", "Agpmsn",
"Agpngs", "Agpnpa", "Agpnpg", "Agpnpi", "Agpnpl", "Agpnpn", "Agpnsa", "Agpnsd",
"Agpnsg", "Agpnsl", "Agpnsn", "Agsfpa", "Agsfpg", "Agsfpn", "Agsfsa", "Agsfsg",
"Agsfsi", "Agsfsl", "Agsfsn", "Agsmpa", "Agsmpg", "Agsmpn", "Agsmsan", "Agsmsd",
"Agsmsn", "Agsnpg", "Agsnsn", "Appfpg", "Appfpl", "Appfpn", "Appfsa", "Appfsg",
"Appfsl", "Appfsn", "Appmpa", "Appmpd", "Appmpg", "Appmpi", "Appmpn", "Appmsan",
"Appmsay", "Appmsd", "Appmsg", "Appmsi", "Appmsl", "Appmsn", "Appnpa", "Appnpn",
"Appnsg", "Appnsl", "Appnsn", "Apsfsg", "Aspfpn", "Aspfsl", "Aspfsn", "Aspmsd",
"Aspmsn", "Aspnsa", "Aspnsg", "Cc", "Ccs", "Cs", "Css", "M", "Mc-p-l", "Mc-pal",
"Mc-pgl", "Mc-s-l", "Mcfp-l", "Mcfpal", "Mcfpgl", "Mcfpnl", "Mcfsal-", "Mcfsgl-",
"Mcfsll-", "Mcfsnl-", "Mcmpal", "Mcmpan", "Mcmpgl", "Mcmpnl", "Mcmsal",
"Mcmsal---n", "Mcmsal---y", "Mcmsgl", "Mcmsil-", "Mcmsll", "Mcmsnl", "Mcnpnl",
"Mcnsal-", "Mcnsnl-", "Ms-s-l", "Msfpgl", "Msfpnl", "N--pg", "N--pn", "N-fpa",
"N-fpd", "N-fpg", "N-fpi", "N-fpl", "N-fpn", "N-fsa", "N-fsd", "N-fsg", "N-fsi",
"N-fsl", "N-fsn", "N-mpa", "N-mpd", "N-mpg", "N-mpi", "N-mpl", "N-mpn", "N-msa",
"N-msan", "N-msay", "N-msd", "N-msg", "N-msi", "N-msl", "N-msn", "N-msv", "N-npa",
"N-npd", "N-npg", "N-npi", "N-npl", "N-npn", "N-nsa", "N-nsd", "N-nsg", "N-nsi",
"N-nsl", "N-nsn", "Ncfpn", "Ncfsg", "Ncfsl", "Ncfsn", "Ncmpa", "Ncmpg", "Ncmpl",
"Ncmpn", "Ncmsg", "Ncmsi", "Ncmsl", "Ncmsn", "Np-si", "Np-sn", "Npmsi", "Npmsn",
"Pd-fpa--n-a--", "Pd-fpg--n-a--", "Pd-fpn--n-a--", "Pd-fsa--n-a--",
"Pd-fsg--n-a--", "Pd-fsi--n-a--", "Pd-fsl--n-a--", "Pd-fsn--n-a--",
"Pd-mpa--n-a--", "Pd-mpg--n-a--", "Pd-mpi--n-a--", "Pd-mpn--n-a--",
"Pd-msa--n-a-n", "Pd-msd--n-a--", "Pd-msg--n-a--", "Pd-msi--n-a--",
"Pd-msl--n-a--", "Pd-msn--n-a--", "Pd-npa--n-a--", "Pd-npi--n-a--",
"Pd-nsa--n-a--", "Pd-nsg--n-a--", "Pd-nsi--n-a--", "Pd-nsl--n-a--",
"Pd-nsn--n-a--", "Pi-fpa--n-a--", "Pi-fpd--n-a--", "Pi-fpg--n-a--",
"Pi-fpi--n-a--", "Pi-fpl--n-a--", "Pi-fpn--n-a--", "Pi-fsa--n-a--",
"Pi-fsg--n-a--", "Pi-fsi--n-a--", "Pi-fsl--n-a--", "Pi-fsn--n-a--",
"Pi-mpa--n-a--", "Pi-mpd--n-a--", "Pi-mpg--n-a--", "Pi-mpi--n-a--",
"Pi-mpl--n-a--", "Pi-mpn--n-a--", "Pi-msa--n-a--", "Pi-msa--n-a-n",
"Pi-msa--n-a-y", "Pi-msd--n-a--", "Pi-msg--n-a--", "Pi-msi--n-a--",
"Pi-msl--n-a--", "Pi-msn--n-a--", "Pi-npa--n-a--", "Pi-npi--n-a--",
"Pi-npl--n-a--", "Pi-npn--n-a--", "Pi-nsa--n-a--", "Pi-nsd--n-a--",
"Pi-nsg--n-a--", "Pi-nsi--n-a--", "Pi-nsl--n-a--", "Pi-nsn--n-a--",
"Pi3m-a--n-n-y", "Pi3m-d--n-n-y", "Pi3m-n--n-n-y", "Pi3n-a--n-n-n",
"Pi3n-g--n-n-n", "Pi3n-i--n-n-n", "Pi3n-i--y-n-n", "Pi3n-n--n-n-n", "Pi3nsn----a",
"Pp1-pa--n-n--", "Pp1-pd--y-n--", "Pp1-pn--n-n--", "Pp1-sa--n-n--",
"Pp1-sa--y-n--", "Pp1-sd--y-n--", "Pp1-sn--n-n--", "Pp2-pd--y-n--",
"Pp3-pa--y-n--", "Pp3-pd--y-n--", "Pp3-pg--n-n--", "Pp3-pg--y-n--",
"Pp3fsa--y-n--", "Pp3fsd--y-n--", "Pp3fsi--n-n--", "Pp3fsn--n-n--",
"Pp3mpn--n-n--", "Pp3msa--n-n--", "Pp3msa--y-n--", "Pp3msd--n-n--",
"Pp3msd--y-n--", "Pp3msg--n-n--", "Pp3msi--n-n--", "Pp3msn--n-n--",
"Pp3npn--n-n--", "Pp3nsn--n-n--", "Ps1fpgp-n-a--", "Ps1fsgp-n-a--",
"Ps1mpgp-n-a--", "Ps1msnp-n-a--", "Ps1msns-n-a--", "Ps1nsnp-n-a--",
"Ps3fpap-n-a--", "Ps3fpgsfn-a--", "Ps3fpnsmn-a--", "Ps3fsgsmn-a--",
"Ps3fsnsfn-a--", "Ps3fsnsmn-a--", "Ps3mpasmn-a--", "Ps3mpgsfn-a--",
"Ps3mpgsnn-a--", "Ps3mpnp-n-a--", "Ps3msgsmn-a--", "Ps3mslsmn-a--",
"Ps3mslsnn-a--", "Ps3msnp-n-a--", "Ps3msnsfn-a--", "Ps3msnsmn-a--",
"Ps3npgsmn-a--", "Ps3nplsmn-a--", "Ps3nsisfn-a--", "Ps3nsnsfn-a--", "Px--sa--ypn-",
"Px--sa--ypn--", "Px--sd--ypn--", "Px-fpa--nsa--", "Px-fpg--nsa--",
"Px-fsa--nsa--", "Px-fsg--nsa--", "Px-fsl--nsa--", "Px-mpa--nsa--",
"Px-mpl--nsa--", "Px-msa--nsa-n", "Px-msg--nsa--", "Px-msi--nsa--",
"Px-msl--nsa--", "Px-nsa--nsa--", "Qo", "Qq", "Qr", "Qz", "Rgc", "Rgp", "Rgs",
"Rl", "Rlp", "Rnp", "Rp", "Rs", "Rt", "Rtp", "Sa", "Sd", "Sg", "Si", "Sl", "Spsa",
"Spsg", "Spsi", "Spsl", "Var1p", "Var1s", "Var2p", "Var3p", "Var3p-y", "Var3s",
"Var3s-y", "Vca1s", "Vca2s", "Vca3p", "Vca3s", "Vcia3s", "Vcip3p", "Vcip3s", "Vcn",
"Vcp-pf", "Vcp-pm", "Vcp-pn", "Vcp-sf", "Vcp-sm", "Vcp-sn", "Vcpp", "Vcps-sna",
"Vcr1p", "Vcr1p-y", "Vcr1s", "Vcr2p", "Vcr3p", "Vcr3p-y", "Vcr3s", "Vcr3s-y",
"Vma3s", "Vmip3p", "Vmip3s", "Vmm1p", "Vmm2p", "Vmm2s", "Vmn", "Vmp-pf", "Vmp-pm",
"Vmp-pn", "Vmp-sf", "Vmp-sm", "Vmp-sn", "Vmps-pma", "Vmps-sma", "Vmps-snp",
"Vmr1p", "Vmr1s", "Vmr2p", "Vmr3p", "Vmr3s", "Vmr3s-y", "X", "Y", "Yn--n", "Yn-s-",
"Yn-sl", "Yn-sn", "Ynfpg", "Ynfsa", "Ynfsd", "Ynfsg", "Ynfsl", "Ynfsn", "Ynmpg",
"Ynmpn", "Ynmsa", "Ynmsd", "Ynmsg", "Ynmsi", "Ynmsl", "Ynmsn", "Z" };
//String[] unmappedPosTags = { "$", "''", "-LRB-", "-RRB-", "<root-POS>", "``" };
String[] depTags = { "<no-type>", "Adv", "Ap", "Atr", "Atv", "Aux", "Co",
"Elp", "Obj", "Oth", "Pnom", "Pred", "Prep", "Punc", "Sb", "Sub" };
String[] posOrig = { "Vmr1p", "Rgp", "Agpmsn", "N-msn", "N-msn", "Z", "Pi-fsn--n-a",
"Vmr3s", "Pi3n-a--n-nn", "Sg", "N-mpg", "Cc", "Vmn", "Pi3n-n--n-nn", "Vcr3s", "Rgc",
"Agpnsn", "Z" };
String[] posMapped = { "POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS",
"POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS" };
AssertAnnotations.assertPOS(posMapped, posOrig, select(jcas, POS.class));
AssertAnnotations.assertDependencies(dependencies, select(jcas, Dependency.class));
AssertAnnotations.assertTagset(MstParser.class, POS.class, "mte5-reduced", posTags, jcas);
//AssertAnnotations.assertTagsetMapping(POS.class, "mte5", unmappedPosTags, jcas);
AssertAnnotations.assertTagset(MstParser.class, Dependency.class, "setimes.hr", depTags, jcas);
}
/**
* The POS tags produced by Hunpos are MSD tags. This model here only uses the first character
* of these MSD tags. Thus, we have a tag mismatch and the results here are completely bogus.
*
* @throws Exception
* if an error occurs.
*/
@Ignore("Takes too long")
@Test
public void testCroatianMte5Pos()
throws Exception
{
JCas jcas = runTest("hr", "mte5.pos", "Moramo vrlo kompliciran primjer rečenicu , "
+ "koja sadrži što više sastojaka i ovisnosti što je više moguće .");
String[] dependencies = {
"[ 0, 6]Dependency(Oth,basic) D[0,6](Moramo) G[12,23](kompliciran)",
"[ 7, 11]Dependency(Oth,basic) D[7,11](vrlo) G[12,23](kompliciran)",
"[ 12, 23]Dependency(Oth,basic) D[12,23](kompliciran) G[24,31](primjer)",
"[ 24, 31]Dependency(Oth,basic) D[24,31](primjer) G[32,40](rečenicu)",
"[ 32, 40]Dependency(Punc,basic) D[32,40](rečenicu) G[41,42](,)",
"[ 41, 42]Dependency(Punc,basic) D[41,42](,) G[48,54](sadrži)",
"[ 43, 47]Dependency(Oth,basic) D[43,47](koja) G[48,54](sadrži)",
"[ 48, 54]Dependency(Oth,basic) D[48,54](sadrži) G[74,75](i)",
"[ 55, 58]Dependency(Oth,basic) D[55,58](što) G[74,75](i)",
"[ 59, 63]Dependency(Atr,basic) D[59,63](više) G[74,75](i)",
"[ 64, 73]Dependency(Oth,basic) D[64,73](sastojaka) G[59,63](više)",
"[ 74, 75]ROOT(Co,basic) D[74,75](i) G[74,75](i)",
"[ 76, 85]Dependency(Oth,basic) D[76,85](ovisnosti) G[98,104](moguće)",
"[ 86, 89]Dependency(Oth,basic) D[86,89](što) G[98,104](moguće)",
"[ 90, 92]Dependency(Oth,basic) D[90,92](je) G[98,104](moguće)",
"[ 93, 97]Dependency(Oth,basic) D[93,97](više) G[98,104](moguće)",
"[ 98,104]Dependency(Punc,basic) D[98,104](moguće) G[105,106](.)",
"[105,106]ROOT(Punc,basic) D[105,106](.) G[105,106](.)" };
String[] posTags = { "<root-POS>", "A", "C", "M", "N", "P", "Q", "R", "S",
"V", "X", "Y", "Z" };
//String[] unmappedPosTags = { "$", "''", "-LRB-", "-RRB-", "<root-POS>", "``" };
String[] depTags = { "<no-type>", "Adv", "Ap", "Atr", "Atv", "Aux", "Co",
"Elp", "Obj", "Oth", "Pnom", "Pred", "Prep", "Punc", "Sb", "Sub" };
String[] posOrig = { "Vmr1p", "Rgp", "Agpmsn", "N-msn", "N-msn", "Z", "Pi-fsn--n-a",
"Vmr3s", "Pi3n-a--n-nn", "Sg", "N-mpg", "Cc", "Vmn", "Pi3n-n--n-nn", "Vcr3s", "Rgc",
"Agpnsn", "Z" };
String[] posMapped = { "POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS",
"POS", "POS", "POS", "POS", "POS", "POS", "POS", "POS" };
AssertAnnotations.assertPOS(posMapped, posOrig, select(jcas, POS.class));
AssertAnnotations.assertDependencies(dependencies, select(jcas, Dependency.class));
AssertAnnotations.assertTagset(MstParser.class, POS.class, "mte5-pos", posTags, jcas);
//AssertAnnotations.assertTagsetMapping(POS.class, "mte5", unmappedPosTags, jcas);
AssertAnnotations.assertTagset(MstParser.class, Dependency.class, "setimes.hr", depTags, jcas);
}
/**
* This method runs the MSTParser for an example sentence and checks if it returns the correct
* annotations. An annotation consists of: dependency type, begin of dependency, end of
* dependency, begin of the head, end of the head
*
* @throws Exception
* if an error occurs.
*/
@Test
public void testEnglishDefault()
throws Exception
{
System.out.printf("Maximum memory: %d%n", Runtime.getRuntime().maxMemory());
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 3000000000l);
JCas jcas = runTest("en", null, "We need a very complicated example sentence , which " +
"contains as many constituents and dependencies as possible .");
String[] dependencies = {
"[ 0, 2]Dependency(nsubj,basic) D[0,2](We) G[3,7](need)",
"[ 3, 7]ROOT(null,basic) D[3,7](need) G[3,7](need)",
"[ 8, 9]Dependency(det,basic) D[8,9](a) G[35,43](sentence)",
"[ 10, 14]Dependency(advmod,basic) D[10,14](very) G[15,26](complicated)",
"[ 15, 26]Dependency(amod,basic) D[15,26](complicated) G[35,43](sentence)",
"[ 27, 34]Dependency(nn,basic) D[27,34](example) G[35,43](sentence)",
"[ 35, 43]Dependency(dobj,basic) D[35,43](sentence) G[3,7](need)",
"[ 44, 45]Dependency(punct,basic) D[44,45](,) G[35,43](sentence)",
"[ 46, 51]Dependency(nsubj,basic) D[46,51](which) G[52,60](contains)",
"[ 52, 60]Dependency(rcmod,basic) D[52,60](contains) G[35,43](sentence)",
"[ 61, 63]Dependency(prep,basic) D[61,63](as) G[52,60](contains)",
"[ 64, 68]Dependency(amod,basic) D[64,68](many) G[69,81](constituents)",
"[ 69, 81]Dependency(pobj,basic) D[69,81](constituents) G[61,63](as)",
"[ 82, 85]Dependency(cc,basic) D[82,85](and) G[69,81](constituents)",
"[ 86, 98]Dependency(conj,basic) D[86,98](dependencies) G[69,81](constituents)",
"[ 99,101]Dependency(dep,basic) D[99,101](as) G[61,63](as)",
"[102,110]Dependency(pobj,basic) D[102,110](possible) G[99,101](as)",
"[111,112]Dependency(punct,basic) D[111,112](.) G[3,7](need)" };
String[] depTags = { "<no-type>", "abbrev", "acomp", "advcl", "advmod",
"amod", "appos", "attr", "aux", "auxpass", "cc", "ccomp", "complm", "conj", "cop",
"csubj", "csubjpass", "dep", "det", "dobj", "expl", "infmod", "iobj", "mark",
"measure", "neg", "nn", "nsubj", "nsubjpass", "null", "num", "number", "parataxis",
"partmod", "pcomp", "pobj", "poss", "possessive", "preconj", "pred", "predet",
"prep", "prt", "punct", "purpcl", "quantmod", "rcmod", "rel", "tmod", "xcomp" };
String[] posTags = { "#", "$", "''", ",", "-LRB-", "-RRB-", ".", ":",
"<root-POS>", "CC", "CD", "DT", "EX", "FW", "IN", "JJ", "JJR", "JJS", "LS", "MD",
"NN", "NNP", "NNPS", "NNS", "PDT", "POS", "PRP", "PRP$", "RB", "RBR", "RBS", "RP",
"SYM", "TO", "UH", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$",
"WRB", "``" };
String[] unmappedPos = { "<root-POS>"};
AssertAnnotations.assertDependencies(dependencies, select(jcas, Dependency.class));
AssertAnnotations.assertTagset(POS.class, "ptb", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "ptb", unmappedPos, jcas);
AssertAnnotations.assertTagset(Dependency.class, "stanford", depTags, jcas);
}
/**
* This method runs the MSTParser for an example sentence and checks if it returns the correct
* annotations. An annotation consists of: dependency type, begin of dependency, end of
* dependency, begin of the head, end of the head
*
* @throws Exception
* if an error occurs.
*/
@Test
public void testEnglishSample()
throws Exception
{
JCas jcas = runTest("en", "sample", "We need a very complicated example sentence , which " +
"contains as many constituents and dependencies as possible .");
String[] dependencies = {
"[ 0, 2]Dependency(NP-SBJ,basic) D[0,2](We) G[3,7](need)",
"[ 3, 7]ROOT(ROOT,basic) D[3,7](need) G[3,7](need)",
"[ 8, 9]Dependency(DEP,basic) D[8,9](a) G[35,43](sentence)",
"[ 10, 14]Dependency(DEP,basic) D[10,14](very) G[15,26](complicated)",
"[ 15, 26]Dependency(DEP,basic) D[15,26](complicated) G[35,43](sentence)",
"[ 27, 34]Dependency(DEP,basic) D[27,34](example) G[35,43](sentence)",
"[ 35, 43]Dependency(NP-OBJ,basic) D[35,43](sentence) G[3,7](need)",
"[ 44, 45]Dependency(DEP,basic) D[44,45](,) G[3,7](need)",
"[ 46, 51]Dependency(SBAR,basic) D[46,51](which) G[3,7](need)",
"[ 52, 60]Dependency(S,basic) D[52,60](contains) G[46,51](which)",
"[ 61, 63]Dependency(PP,basic) D[61,63](as) G[52,60](contains)",
"[ 64, 68]Dependency(DEP,basic) D[64,68](many) G[69,81](constituents)",
"[ 69, 81]Dependency(NP,basic) D[69,81](constituents) G[61,63](as)",
"[ 82, 85]Dependency(DEP,basic) D[82,85](and) G[86,98](dependencies)",
"[ 86, 98]Dependency(NP,basic) D[86,98](dependencies) G[61,63](as)",
"[ 99,101]Dependency(PP,basic) D[99,101](as) G[86,98](dependencies)",
"[102,110]Dependency(ADJP,basic) D[102,110](possible) G[99,101](as)",
"[111,112]Dependency(DEP,basic) D[111,112](.) G[3,7](need)" };
String[] posTags = { "$", "''", ",", "-LRB-", "-RRB-", ".", ":", "<root-POS>",
"CC", "CD", "DT", "FW", "IN", "JJ", "JJR", "JJS", "MD", "NN", "NNP", "NNPS", "NNS",
"POS", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "TO", "VB", "VBD", "VBG", "VBN",
"VBP", "VBZ", "WDT", "WP", "WRB", "``" };
String[] unmappedPos = { "<root-POS>"};
String[] depTags = { "<no-type>", "ADJP", "ADVP", "CONJP", "DEP", "FRAG",
"NAC", "NP", "NP-OBJ", "NP-PRD", "NP-SBJ", "NX", "PP", "PRN", "PRT", "QP", "ROOT",
"S", "SBAR", "SINV", "SQ", "UCP", "VP", "WHNP" };
AssertAnnotations.assertDependencies(dependencies, select(jcas, Dependency.class));
AssertAnnotations.assertTagset(POS.class, "ptb", posTags, jcas);
AssertAnnotations.assertTagsetMapping(POS.class, "ptb", unmappedPos, jcas);
AssertAnnotations.assertTagset(Dependency.class, "conll2008", depTags, jcas);
}
private JCas runTest(String aLanguage, String aVariant, String aText, Object... aExtraParams)
throws Exception
{
AssumeResource.assumeResource(MstParser.class, "parser", aLanguage, aVariant);
AggregateBuilder aggregate = new AggregateBuilder();
aggregate.add(createEngineDescription(HunPosTagger.class));
Object[] params = new Object[] {
MstParser.PARAM_VARIANT, aVariant,
MstParser.PARAM_PRINT_TAGSET, true};
params = ArrayUtils.addAll(params, aExtraParams);
aggregate.add(createEngineDescription(MstParser.class, params));
return TestRunner.runTest(aggregate.createAggregateDescription(), aLanguage, aText);
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}