/**
* Copyright 2007-2014
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package de.tudarmstadt.ukp.dkpro.core.sfst;
import static de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations.*;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.util.JCasUtil.select;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.jcas.JCas;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner;
public class SfstAnnotatorTest
{
@Test
public void testTurkish()
throws Exception
{
JCas jcas = runTest("tr", "trmorph-ca", "Doktor hastane çalış .");
String[] morphemes = new String[] {
"[ 0, 6] - - - - - - - - - - - - - - - - - Doktor ()",
"[ 7, 14] - - - - - - - - - - - - - - - - - hastane (hastane<n>)",
"[ 7, 14] - - - - - - - - Plur - 3 - - - - - - hastane (hastane<n><3p>)",
"[ 7, 14] - - - - - - - - Sing - 3 - - - - - - hastane (hastane<n><3s>)",
"[ 15, 20] - - - - - - - - - - - - - - - - - çalış (çal<v><vn_yis>)",
"[ 15, 20] - - - - - - - - Plur - 3 - - - - - - çalış (çalış<v><t_imp><3p>)",
"[ 15, 20] - - - - - - - - Sing - 2 - - - - - - çalış (çalış<v><t_imp><2s>)",
"[ 15, 20] - - - - - - - - Plur - 3 - - - - - - çalış (çal<v><D_yIS><n><3p>)",
"[ 15, 20] - - - - - - - - Sing - 3 - - - - - - çalış (çal<v><vn_yis><3s>)",
"[ 15, 20] - - - - - - - - Sing - 3 - - - - - - çalış (çal<v><D_yIS><n><3s>)",
"[ 15, 20] - - - - - - - - - - - - - - - - - çalış (çal<v><D_yIS><n>)",
"[ 15, 20] - - - - - - - - Plur - 3 - - - - - - çalış (çal<v><vn_yis><3p>)",
"[ 21, 22] - - - - - - - - - - - - - - - - - . (.<pnct>)" };
String[] tags = { "<1p>", "<1s>", "<2p>", "<2s>", "<3p>", "<3s>", "<D_AcIK>", "<D_CA>",
"<D_CAK>", "<D_CAgIz>", "<D_CI>", "<D_CIK>", "<D_IcIK>", "<D_IncI>", "<D_ca>",
"<D_cil>", "<D_gil>", "<D_lA>", "<D_lAn>", "<D_lAs>", "<D_lI>", "<D_lIK>",
"<D_mA>", "<D_mAdIK>", "<D_sAl>", "<D_sAr>", "<D_sa>", "<D_siz>", "<D_yIS>",
"<Dan_0>", "<Djn_0>", "<Dmn_0>", "<Dnn_siz>", "<Dvn_yIcI>", "<abil>", "<abl>",
"<acc>", "<acr>", "<adj>", "<adur>", "<adv>", "<agel>", "<agor>", "<akal>",
"<akoy>", "<ant>", "<apos>", "<ayaz>", "<ca>", "<caus>", "<cnjadv>", "<cnjcoo>",
"<cnjsub>", "<cog>", "<cpl_di>", "<cpl_ken>", "<cpl_mis>", "<cpl_sa>", "<cv_acak>",
"<cv_cesine>", "<cv_dan>", "<cv_dik>", "<cv_ecek>", "<cv_eli>", "<cv_erek>",
"<cv_ince>", "<cv_ip>", "<cv_iyor>", "<cv_ma>", "<cv_mak>", "<cv_mis>",
"<cv_zdan>", "<dat>", "<dem>", "<det>", "<dir>", "<exist>", "<gen>", "<ij>",
"<ins>", "<iver>", "<ki>", "<loc>", "<locp>", "<n>", "<neg>", "<nexist>", "<not>",
"<np>", "<num>", "<org>", "<p1p>", "<p1s>", "<p2p>", "<p2s>", "<p3p>", "<p3s>",
"<part_acak>", "<part_dik>", "<part_yan>", "<pass>", "<pers>", "<pl>", "<pnct>",
"<postp>", "<prn>", "<q>", "<qst>", "<rec>", "<ref>", "<t_aor>", "<t_cond>",
"<t_cont>", "<t_fut>", "<t_imp>", "<t_makta>", "<t_narr>", "<t_obl>", "<t_opt>",
"<t_past>", "<top>", "<v>", "<vinf>", "<vn_acak>", "<vn_dik>", "<vn_ma>",
"<vn_mak>", "<vn_yis>" };
String[] unmappedTags = { "<D_AcIK>", "<D_CA>", "<D_CAK>", "<D_CAgIz>", "<D_CI>",
"<D_CIK>", "<D_IcIK>", "<D_IncI>", "<D_ca>", "<D_cil>", "<D_gil>", "<D_lA>",
"<D_lAn>", "<D_lAs>", "<D_lI>", "<D_lIK>", "<D_mA>", "<D_mAdIK>", "<D_sAl>",
"<D_sAr>", "<D_sa>", "<D_siz>", "<D_yIS>", "<Dan_0>", "<Djn_0>", "<Dmn_0>",
"<Dnn_siz>", "<Dvn_yIcI>", "<abil>", "<acr>", "<adj>", "<adur>", "<adv>", "<agel>",
"<agor>", "<akal>", "<akoy>", "<ant>", "<apos>", "<ayaz>", "<ca>", "<caus>",
"<cnjadv>", "<cnjcoo>", "<cnjsub>", "<cog>", "<cpl_di>", "<cpl_ken>", "<cpl_mis>",
"<cpl_sa>", "<cv_acak>", "<cv_cesine>", "<cv_dan>", "<cv_dik>", "<cv_ecek>",
"<cv_eli>", "<cv_erek>", "<cv_ince>", "<cv_ip>", "<cv_iyor>", "<cv_ma>",
"<cv_mak>", "<cv_mis>", "<cv_zdan>", "<dem>", "<det>", "<dir>", "<exist>", "<ij>",
"<iver>", "<ki>", "<locp>", "<n>", "<neg>", "<nexist>", "<not>", "<np>", "<num>",
"<org>", "<p1p>", "<p1s>", "<p2p>", "<p2s>", "<p3p>", "<p3s>", "<part_acak>",
"<part_dik>", "<part_yan>", "<pass>", "<pers>", "<pl>", "<pnct>", "<postp>",
"<prn>", "<q>", "<qst>", "<rec>", "<ref>", "<t_aor>", "<t_cond>", "<t_cont>",
"<t_fut>", "<t_imp>", "<t_makta>", "<t_narr>", "<t_obl>", "<t_opt>", "<t_past>",
"<top>", "<v>", "<vinf>", "<vn_acak>", "<vn_dik>", "<vn_ma>", "<vn_mak>",
"<vn_yis>" };
assertMorph(morphemes, select(jcas, MorphologicalFeatures.class));
assertTagset(MorphologicalFeatures.class, "trmorph", tags, jcas);
assertTagsetParser(MorphologicalFeatures.class, "trmorph", unmappedTags, jcas);
}
@Test
public void testGermanMorphisto()
throws Exception
{
JCas jcas = runTest("de", "morphisto-ca", "Der Arzt arbeitet im Krankenhaus .");
String[] morphemes = new String[] {
"[ 0, 3] - - Gen - - Fem - - Sing - - - - - - - - Der (<CAP>die<+ART><Def><Fem><Gen><Sg>)",
"[ 0, 3] - - Gen - - - - - Plur - - - - - - - - Der (<CAP>die<+ART><Def><NoGend><Gen><Pl>)",
"[ 0, 3] - - Gen - - Fem - - Sing - - - - - - - - Der (<CAP>die<+DEM><subst><Fem><Gen><Sg>)",
"[ 0, 3] - - Nom - - Masc - - Sing - - - - - - - - Der (<CAP>der<+ART><Def><Masc><Nom><Sg>)",
"[ 0, 3] - - Nom - - Masc - - Sing - - - - - - - - Der (<CAP>der<+DEM><subst><Masc><Nom><Sg>)",
"[ 0, 3] - - Nom - - Masc - - Sing - - - - - - - - Der (<CAP>der<+REL><subst><Masc><Nom><Sg>)",
"[ 0, 3] - - Dat - - Fem - - Sing - - - - - - - - Der (<CAP>die<+DEM><subst><Fem><Dat><Sg>)",
"[ 0, 3] - - Dat - - Fem - - Sing - - - - - - - - Der (<CAP>die<+REL><subst><Fem><Dat><Sg>)",
"[ 0, 3] - - Dat - - Fem - - Sing - - - - - - - - Der (<CAP>die<+ART><Def><Fem><Dat><Sg>)",
"[ 4, 8] - - Nom - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Nom><Sg>)",
"[ 4, 8] - - Acc - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Akk><Sg>)",
"[ 4, 8] - - Dat - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Dat><Sg>)",
"[ 9, 17] - - - - - - Sub - Plur - 2 - - - Pres - - arbeitet (arbeiten<+V><2><Pl><Pres><Konj>)",
"[ 9, 17] - - - - - - Ind - Plur - 2 - - - Pres - - arbeitet (arbeiten<+V><2><Pl><Pres><Ind>)",
"[ 9, 17] - - - - - - Ind - Sing - 3 - - - Pres - - arbeitet (arbeiten<+V><3><Sg><Pres><Ind>)",
"[ 9, 17] - - - - - - - - Plur - - - - - Imp - - arbeitet (arbeiten<+V><Imp><Pl>)",
"[ 18, 20] - - Dat - - Masc - - Sing - - - - - - - - im (im<+PREP/ART><Masc><Dat><Sg>)",
"[ 18, 20] - - Dat - - Neut - - Sing - - - - - - - - im (im<+PREP/ART><Neut><Dat><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (Kranke<NN>Haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (kranken<V><NN><SUFF>Haus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (kranken<V><NN><SUFF>Haus<+NN><Neut><Akk><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (krank<ADJ><NN><SUFF>Haus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (krank<ADJ><NN><SUFF>Haus<+NN><Neut><Akk><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (krank<ADJ><NN><SUFF>Haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (kranken<V><NN><SUFF>Haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Akk><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (Kranke<NN>Haus<+NN><Neut><Akk><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (Kranke<NN>Haus<+NN><Neut><Dat><Sg>)",
"[ 33, 34] - - - - - - - - - - - - - - - - - . (.<+IP><Norm>)" };
String[] tags = { "<+ADJ>", "<+ADV>", "<+ART>", "<+CARD>", "<+CHAR>", "<+CIRCP>", "<+DEM>",
"<+DEMPRO>", "<+INDEF>", "<+INTJ>", "<+IP>", "<+KONJ>", "<+NE>", "<+NN>", "<+ORD>",
"<+POSS>", "<+POSTP>", "<+PPRO>", "<+PREP/ART>", "<+PREP>", "<+PROADV>", "<+PTKL>",
"<+REL>", "<+SYMBOL>", "<+TRUNC>", "<+V>", "<+VPRE>", "<+WADV>", "<+WPRO>", "<1>",
"<2>", "<3>", "<ADJ>", "<ADV>", "<Adj>", "<Adv>", "<Akk>", "<Ant>", "<CAP>",
"<CARD>", "<Comp>", "<DIGCARD>", "<Dat>", "<Def>", "<Fem>", "<Gen>", "<Imp>",
"<Ind>", "<Indef>", "<Inf>", "<Invar>", "<Komma>", "<Kon>", "<Konj>", "<Masc>",
"<NE>", "<NN>", "<Neg>", "<Neut>", "<NoGend>", "<Nom>", "<Norm>", "<ORD>",
"<OTHER>", "<PPast>", "<PPres>", "<PREF>", "<Past>", "<Pl>", "<Pos>", "<Pred>",
"<Pres>", "<ProAdv>", "<QUANT>", "<SUFF>", "<Sg>", "<St/Mix>", "<St>", "<Sub>",
"<Sup>", "<Sw/Mix>", "<Sw>", "<UC>", "<V>", "<Vgl>", "<^ABK>", "<^VPAST>",
"<^VPRES>", "<attr>", "<links>", "<mD>", "<oD>", "<pers>", "<prfl>", "<pro>",
"<rechts>", "<refl>", "<rez>", "<subst>", "<zu>" };
String[] unmappedTags = { "<+ADJ>", "<+ADV>", "<+ART>", "<+CARD>", "<+CHAR>", "<+CIRCP>",
"<+DEM>", "<+DEMPRO>", "<+INDEF>", "<+INTJ>", "<+IP>", "<+KONJ>", "<+NE>", "<+NN>",
"<+ORD>", "<+POSS>", "<+POSTP>", "<+PPRO>", "<+PREP/ART>", "<+PREP>", "<+PROADV>",
"<+PTKL>", "<+REL>", "<+SYMBOL>", "<+TRUNC>", "<+V>", "<+VPRE>", "<+WADV>",
"<+WPRO>", "<ADJ>", "<ADV>", "<Adj>", "<Adv>", "<Ant>", "<CAP>", "<CARD>",
"<Comp>", "<DIGCARD>", "<Def>", "<Indef>", "<Inf>", "<Invar>", "<Komma>", "<Kon>",
"<NE>", "<NN>", "<Neg>", "<NoGend>", "<Norm>", "<ORD>", "<OTHER>", "<PPast>",
"<PPres>", "<PREF>", "<Past>", "<Pos>", "<Pred>", "<ProAdv>", "<QUANT>", "<SUFF>",
"<St/Mix>", "<St>", "<Sub>", "<Sup>", "<Sw/Mix>", "<Sw>", "<UC>", "<V>", "<Vgl>",
"<^ABK>", "<^VPAST>", "<^VPRES>", "<attr>", "<links>", "<mD>", "<oD>", "<pers>",
"<prfl>", "<pro>", "<rechts>", "<refl>", "<rez>", "<subst>", "<zu>" };
assertMorph(morphemes, select(jcas, MorphologicalFeatures.class));
assertTagset(MorphologicalFeatures.class, "morphisto", tags, jcas);
assertTagsetParser(MorphologicalFeatures.class, "morphisto", unmappedTags, jcas);
}
@Test
public void testGermanSmor()
throws Exception
{
JCas jcas = runTest("de", "smor-ca", "Der Arzt arbeitet im Krankenhaus .");
String[] morphemes = new String[] {
"[ 0, 3] - - - - - - - - - - - - - - - - - Der ()",
"[ 4, 8] - - Nom - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Nom><Sg>)",
"[ 4, 8] - - Acc - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Acc><Sg>)",
"[ 4, 8] - - Dat - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Dat><Sg>)",
"[ 9, 17] - - - - - - Sub - Plur - 2 - - - Pres - - arbeitet (arbeiten<+V><2><Pl><Pres><Subj>)",
"[ 9, 17] - - - - - - Imp - Plur - - - - - - - - arbeitet (arbeiten<+V><Imp><Pl>)",
"[ 9, 17] - - - - - - Ind - Sing - 3 - - - Pres - - arbeitet (arbeiten<+V><3><Sg><Pres><Ind>)",
"[ 9, 17] - - - - - - Ind - Plur - 2 - - - Pres - - arbeitet (arbeiten<+V><2><Pl><Pres><Ind>)",
"[ 18, 20] - - Dat - - Masc - - Sing - - - - - - - - im (in<+PREPART><Masc><Dat><Sg>)",
"[ 18, 20] - - Dat - - Neut - - Sing - - - - - - - - im (in<+PREPART><Neut><Dat><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (kranken<V><NN><SUFF>Haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (krank<ADJ><NN><SUFF>Haus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (krank<ADJ><NN><SUFF>Haus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (kranken<V><NN><SUFF>Haus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (krank<ADJ><NN><SUFF>Haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (kranken<V><NN><SUFF>Haus<+NN><Neut><Dat><Sg>)",
"[ 33, 34] - - - - - - - - - - - - - - - - - . (.<+PUNCT><Norm>)" };
String[] tags = { "<+ADJ>", "<+ADV>", "<+ART>", "<+CARD>", "<+CIRCP>", "<+CONJ>", "<+DEM>",
"<+INDEF>", "<+INTJ>", "<+NN>", "<+NPROP>", "<+ORD>", "<+POSS>", "<+POSTP>",
"<+PPRO>", "<+PREP>", "<+PREPART>", "<+PROADV>", "<+PTCL>", "<+PUNCT>", "<+REL>",
"<+SYMBOL>", "<+TRUNC>", "<+V>", "<+VPART>", "<+WADV>", "<+WPRO>", "<1>", "<2>",
"<3>", "<ADJ>", "<ADV>", "<Acc>", "<Adj>", "<Adv>", "<Ans>", "<Attr>", "<CAP>",
"<CARD>", "<Comma>", "<Comp>", "<Compar>", "<Coord>", "<Dat>", "<Def>", "<F>",
"<Fem>", "<Ge-Nom>", "<Gen>", "<Imp>", "<Ind>", "<Indef>", "<Inf>", "<Invar>",
"<KSF>", "<Left>", "<Masc>", "<NEWORTH>", "<NN>", "<NPROP>", "<Neg>", "<Neut>",
"<NoGend>", "<Nom>", "<Norm>", "<OLDORTH>", "<ORD>", "<Old>", "<PPast>", "<PPres>",
"<PREF>", "<Past>", "<Pers>", "<Pl>", "<Pos>", "<Pred>", "<Pres>", "<Pro>",
"<Rec>", "<Refl>", "<Right>", "<SUFF>", "<Sg>", "<Simp>", "<St>", "<Sub>",
"<Subj>", "<Subst>", "<Sup>", "<TRUNC>", "<V>", "<VPART>", "<VPREF>", "<Wk>",
"<^ABBR>", "<zu>" };
String[] unmappedTags = {};
assertMorph(morphemes, select(jcas, MorphologicalFeatures.class));
assertTagset(MorphologicalFeatures.class, "smor", tags, jcas);
assertTagsetParser(MorphologicalFeatures.class, "smor", unmappedTags, jcas);
}
@Test
public void testGermanZmorgeOrig()
throws Exception
{
JCas jcas = runTest("de", "zmorge-orig-ca", "Der Arzt arbeitet im Krankenhaus .");
String[] morphemes = new String[] {
"[ 0, 3] - - Dat - - Fem - - Sing - - - Rel - - - - Der (<CAP>die<+REL><Subst><Fem><Dat><Sg><St>)",
"[ 0, 3] - - Dat - - Fem - - Sing - - - - - - - - Der (<CAP>die<+DEM><Subst><Fem><Dat><Sg><St>)",
"[ 0, 3] - - Gen Def - - - - Plur - - - - - - - - Der (<CAP>die<+ART><Def><NoGend><Gen><Pl><St>)",
"[ 0, 3] - - Gen Def - Fem - - Sing - - - - - - - - Der (<CAP>die<+ART><Def><Fem><Gen><Sg><St>)",
"[ 0, 3] - - Nom Def - Masc - - Sing - - - - - - - - Der (<CAP>die<+ART><Def><Masc><Nom><Sg><St>)",
"[ 0, 3] - - Dat Def - Fem - - Sing - - - - - - - - Der (<CAP>die<+ART><Def><Fem><Dat><Sg><St>)",
"[ 0, 3] - - Nom - - Masc - - Sing - - - - - - - - Der (<CAP>die<+DEM><Subst><Masc><Nom><Sg><St>)",
"[ 0, 3] - - Nom - - Masc - - Sing - - - Rel - - - - Der (<CAP>die<+REL><Subst><Masc><Nom><Sg><St>)",
"[ 4, 8] - - Acc - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Acc><Sg>)",
"[ 4, 8] - - Nom - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Nom><Sg>)",
"[ 4, 8] - - Dat - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Dat><Sg>)",
"[ 9, 17] - - - - - - Sub - Plur - 2 - - - Pres - - arbeitet (arbeiten<+V><2><Pl><Pres><Subj>)",
"[ 9, 17] - - - - - - Ind - Sing - 3 - - - Pres - - arbeitet (arbeiten<+V><3><Sg><Pres><Ind>)",
"[ 9, 17] - - - - - - Ind - Plur - 2 - - - Pres - - arbeitet (arbeiten<+V><2><Pl><Pres><Ind>)",
"[ 9, 17] - - - - - - Imp - Plur - - - - - - - - arbeitet (arbeiten<+V><Imp><Pl>)",
"[ 18, 20] - - Dat - - Neut - - Sing - - - - - - - - im (in<+PREPART><Neut><Dat><Sg>)",
"[ 18, 20] - - Dat - - Masc - - Sing - - - - - - - - im (in<+PREPART><Masc><Dat><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (krank<ADJ><NN><SUFF>Haus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (Kran<NN>Ken<NN>Haus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (Kran<NN>Ken<NN>Haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (Kran<NN>Ken<NN>Haus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (kranken<V><NN><SUFF>Haus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (kranken<V><NN><SUFF>Haus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (kranken<V><NN><SUFF>Haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (krank<ADJ><NN><SUFF>Haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (krank<ADJ><NN><SUFF>Haus<+NN><Neut><Dat><Sg>)",
"[ 33, 34] - - Acc - - Fem - - Sing - - - - - - - - . (.<^ABBR><+NN><Fem><Acc><Sg>)",
"[ 33, 34] - - Nom - - Fem - - Sing - - - - - - - - . (.<^ABBR><+NN><Fem><Nom><Sg>)",
"[ 33, 34] - - - - - - - - - - - - - - - - - . (.<+PUNCT><Norm>)",
"[ 33, 34] - - Gen - - Fem - - Sing - - - - - - - - . (.<^ABBR><+NN><Fem><Gen><Sg>)",
"[ 33, 34] - - Dat - - Fem - - Sing - - - - - - - - . (.<^ABBR><+NN><Fem><Dat><Sg>)" };
String[] tags = { "<+ADJ>", "<+ADV>", "<+ART>", "<+CARD>", "<+CONJ>", "<+DEM>", "<+INDEF>",
"<+INTJ>", "<+NN>", "<+NPROP>", "<+ORD>", "<+POSS>", "<+POSTP>", "<+PPRO>",
"<+PREP>", "<+PREPART>", "<+PROADV>", "<+PTCL>", "<+PUNCT>", "<+REL>", "<+SYMBOL>",
"<+TRUNC>", "<+V>", "<+VPART>", "<+WADV>", "<+WPRO>", "<1>", "<2>", "<3>", "<ADJ>",
"<ADV>", "<Acc>", "<Adv>", "<Ans>", "<Attr>", "<CAP>", "<CARD>", "<Comma>",
"<Comp>", "<Compar>", "<Coord>", "<Dat>", "<Def>", "<F>", "<Fem>", "<GUESSER>",
"<Ge-Nom>", "<Gen>", "<Imp>", "<Ind>", "<Indef>", "<Inf>", "<Invar>", "<Left>",
"<Masc>", "<NEWORTH>", "<NN>", "<NPROP>", "<Neg>", "<Neut>", "<NoGend>", "<Nom>",
"<Norm>", "<OLDORTH>", "<ORD>", "<Old>", "<PPast>", "<PPres>", "<PREF>", "<Past>",
"<Pers>", "<Pl>", "<Pos>", "<Pred>", "<Pres>", "<Pro>", "<Rec>", "<Refl>",
"<Right>", "<SUFF>", "<Sg>", "<Simp>", "<St>", "<Sub>", "<Subj>", "<Subst>",
"<Sup>", "<TRUNC>", "<V>", "<VPART>", "<VPREF>", "<Wk>", "<^ABBR>", "<zu>" };
String[] unmappedTags = {};
assertMorph(morphemes, select(jcas, MorphologicalFeatures.class));
assertTagset(MorphologicalFeatures.class, "smor", tags, jcas);
assertTagsetParser(MorphologicalFeatures.class, "smor", unmappedTags, jcas);
}
@Test
public void testGermanZmorgeNewlemma()
throws Exception
{
JCas jcas = runTest("de", "zmorge-newlemma-ca", "Der Arzt arbeitet im Krankenhaus .");
String[] morphemes = new String[] {
"[ 0, 3] - - Dat - - Fem - - Sing - - - Rel - - - - Der (<CAP>die<+REL><Subst><Fem><Dat><Sg><St>)",
"[ 0, 3] - - Dat - - Fem - - Sing - - - - - - - - Der (<CAP>die<+DEM><Subst><Fem><Dat><Sg><St>)",
"[ 0, 3] - - Gen Def - - - - Plur - - - - - - - - Der (<CAP>die<+ART><Def><NoGend><Gen><Pl><St>)",
"[ 0, 3] - - Gen Def - Fem - - Sing - - - - - - - - Der (<CAP>die<+ART><Def><Fem><Gen><Sg><St>)",
"[ 0, 3] - - Nom Def - Masc - - Sing - - - - - - - - Der (<CAP>die<+ART><Def><Masc><Nom><Sg><St>)",
"[ 0, 3] - - Dat Def - Fem - - Sing - - - - - - - - Der (<CAP>die<+ART><Def><Fem><Dat><Sg><St>)",
"[ 0, 3] - - Nom - - Masc - - Sing - - - - - - - - Der (<CAP>die<+DEM><Subst><Masc><Nom><Sg><St>)",
"[ 0, 3] - - Nom - - Masc - - Sing - - - Rel - - - - Der (<CAP>die<+REL><Subst><Masc><Nom><Sg><St>)",
"[ 4, 8] - - Acc - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Acc><Sg>)",
"[ 4, 8] - - Nom - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Nom><Sg>)",
"[ 4, 8] - - Dat - - Masc - - Sing - - - - - - - - Arzt (Arzt<+NN><Masc><Dat><Sg>)",
"[ 9, 17] - - - - - - Sub - Plur - 2 - - - Pres - - arbeitet (arbeit<~>en<+V><2><Pl><Pres><Subj>)",
"[ 9, 17] - - - - - - Ind - Sing - 3 - - - Pres - - arbeitet (arbeit<~>en<+V><3><Sg><Pres><Ind>)",
"[ 9, 17] - - - - - - Ind - Plur - 2 - - - Pres - - arbeitet (arbeit<~>en<+V><2><Pl><Pres><Ind>)",
"[ 9, 17] - - - - - - Imp - Plur - - - - - - - - arbeitet (arbeit<~>en<+V><Imp><Pl>)",
"[ 18, 20] - - Dat - - Neut - - Sing - - - - - - - - im (in<+PREPART><Neut><Dat><Sg>)",
"[ 18, 20] - - Dat - - Masc - - Sing - - - - - - - - im (in<+PREPART><Masc><Dat><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (Kran<#>ken<#>haus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (Krank<~>en<#>haus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (Krank<~>en<#>haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (Krankenhaus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (Krank<~>en<#>haus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Acc - - Neut - - Sing - - - - - - - - Krankenhaus (Krank<~>en<#>haus<+NN><Neut><Acc><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (Krank<~>en<#>haus<+NN><Neut><Dat><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (Krank<~>en<#>haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Nom - - Neut - - Sing - - - - - - - - Krankenhaus (Kran<#>ken<#>haus<+NN><Neut><Nom><Sg>)",
"[ 21, 32] - - Dat - - Neut - - Sing - - - - - - - - Krankenhaus (Kran<#>ken<#>haus<+NN><Neut><Dat><Sg>)",
"[ 33, 34] - - Acc - - Fem - - Sing - - - - - - - - . (.<+NN><Fem><Acc><Sg>)",
"[ 33, 34] - - Nom - - Fem - - Sing - - - - - - - - . (.<+NN><Fem><Nom><Sg>)",
"[ 33, 34] - - - - - - - - - - - - - - - - - . (.<+PUNCT><Norm>)",
"[ 33, 34] - - Gen - - Fem - - Sing - - - - - - - - . (.<+NN><Fem><Gen><Sg>)",
"[ 33, 34] - - Dat - - Fem - - Sing - - - - - - - - . (.<+NN><Fem><Dat><Sg>)" };
String[] tags = { "<#>", "<+ADJ>", "<+ADV>", "<+ART>", "<+CARD>", "<+CONJ>", "<+DEM>",
"<+INDEF>", "<+INTJ>", "<+NN>", "<+NPROP>", "<+ORD>", "<+POSS>", "<+POSTP>",
"<+PPRO>", "<+PREP>", "<+PREPART>", "<+PROADV>", "<+PTCL>", "<+PUNCT>", "<+REL>",
"<+SYMBOL>", "<+TRUNC>", "<+V>", "<+VPART>", "<+WADV>", "<+WPRO>", "<->", "<1>",
"<2>", "<3>", "<Acc>", "<Adv>", "<Ans>", "<Attr>", "<CAP>", "<Comma>", "<Comp>",
"<Compar>", "<Coord>", "<Dat>", "<Def>", "<Fem>", "<GUESSER>", "<Gen>", "<Imp>",
"<Ind>", "<Indef>", "<Inf>", "<Invar>", "<Left>", "<Masc>", "<NEWORTH>", "<Neg>",
"<Neut>", "<NoGend>", "<Nom>", "<Norm>", "<OLDORTH>", "<Old>", "<PPast>",
"<PPres>", "<Past>", "<Pers>", "<Pl>", "<Pos>", "<Pred>", "<Pres>", "<Pro>",
"<Rec>", "<Refl>", "<Right>", "<SUFF>", "<Sg>", "<Simp>", "<St>", "<Sub>",
"<Subj>", "<Subst>", "<Sup>", "<TRUNC>", "<V>", "<Wk>", "<^ABBR>", "<zu>", "<~>" };
String[] unmappedTags = { "<#>", "<->", "<~>" };
assertMorph(morphemes, select(jcas, MorphologicalFeatures.class));
assertTagset(MorphologicalFeatures.class, "smor", tags, jcas);
assertTagsetParser(MorphologicalFeatures.class, "smor", unmappedTags, jcas);
}
@Test
public void testItalian()
throws Exception
{
JCas jcas = runTest("it", "pippi-ca", "Il medico che lavora in ospedale .");
String[] morphemes = new String[] {
"[ 0, 2] - - - - - - - - - - - - - - - - - Il ()",
"[ 3, 9] - - - - - Masc - - Sing - - - - - - - - medico (medico<ADJ><pos><m><s>)",
"[ 3, 9] - - - - - - Ind - Sing - 1 - - - Pres - - medico (medicare<VER><ind><pres><1><s>)",
"[ 3, 9] - - - - - - - - Sing - - - - - - - - medico (medico<NOUN><M><s>)",
"[ 10, 13] - - - - - - - - - - - - - - - - - che (che<CON>)",
"[ 10, 13] - - - - - - - - - - - - - - - - - che (che<WH><CHE>)",
"[ 10, 13] - - - - - Fem - - Sing - - - - - - - - che (che<DET><WH><f><s>)",
"[ 10, 13] - - - - - Masc - - Sing - - - - - - - - che (che<DET><WH><m><s>)",
"[ 10, 13] - - - - - Masc - - Plur - - - - - - - - che (che<DET><WH><m><p>)",
"[ 10, 13] - - - - - Fem - - Plur - - - - - - - - che (che<DET><WH><f><p>)",
"[ 14, 20] - - - - - - - - Sing - 2 - - - Pres - - lavora (lavorare<VER><impr><pres><2><s>)",
"[ 14, 20] - - - - - - Ind - Sing - 3 - - - Pres - - lavora (lavorare<VER><ind><pres><3><s>)",
"[ 21, 23] - - - - - - - - - - - - - - - - - in (in<PRE>)",
"[ 24, 32] - - - - - - - - Sing - - - - - - - - ospedale (ospedale<NOUN><M><s>)",
"[ 33, 34] - - - - - - - - - - - - - - - - - . (.<SENT>)" };
String[] tags = { "<1>", "<2>", "<3>", "<ABL>", "<ADJ>", "<ADV>", "<ART>", "<ARTPRE>",
"<ASP>", "<AUX>", "<CARD>", "<CAU>", "<CE>", "<CHE>", "<CI>", "<CLI>", "<COM>",
"<CON>", "<DEMO>", "<DET>", "<F>", "<INDEF>", "<INT>", "<M>", "<MOD>", "<NE>",
"<NOUN>", "<NUM>", "<P>", "<PERS>", "<PON>", "<POSS>", "<PRE>", "<PRO>", "<S>",
"<SENT>", "<SI>", "<TALE>", "<VER>", "<WH>", "<cela>", "<cele>", "<celi>",
"<celo>", "<cene>", "<ci>", "<comp>", "<cond>", "<f>", "<fut>", "<ger>", "<gli>",
"<gliela>", "<gliele>", "<glieli>", "<glielo>", "<gliene>", "<impf>", "<impr>",
"<ind>", "<inf>", "<la>", "<le>", "<li>", "<lo>", "<m>", "<mela>", "<mele>",
"<meli>", "<melo>", "<mene>", "<mi>", "<ne>", "<p>", "<part>", "<past>", "<pos>",
"<pres>", "<s>", "<sela>", "<sele>", "<seli>", "<selo>", "<sene>", "<si>", "<sub>",
"<sup>", "<tela>", "<tele>", "<teli>", "<telo>", "<tene>", "<ti>", "<vela>",
"<vele>", "<veli>", "<velo>", "<vene>", "<vi>" };
String[] unmappedTags = { "<ABL>", "<ADJ>", "<ADV>", "<ART>", "<ARTPRE>", "<ASP>", "<AUX>",
"<CARD>", "<CAU>", "<CE>", "<CHE>", "<CI>", "<CLI>", "<COM>", "<CON>", "<DEMO>",
"<DET>", "<F>", "<INT>", "<M>", "<MOD>", "<NE>", "<NOUN>", "<NUM>", "<P>",
"<PERS>", "<PON>", "<POSS>", "<PRE>", "<PRO>", "<S>", "<SENT>", "<SI>", "<TALE>",
"<VER>", "<WH>", "<cela>", "<cele>", "<celi>", "<celo>", "<cene>", "<ci>",
"<comp>", "<cond>", "<gli>", "<gliela>", "<gliele>", "<glieli>", "<glielo>",
"<gliene>", "<impr>", "<la>", "<le>", "<li>", "<lo>", "<mela>", "<mele>", "<meli>",
"<melo>", "<mene>", "<mi>", "<ne>", "<part>", "<pos>", "<sela>", "<sele>",
"<seli>", "<selo>", "<sene>", "<si>", "<sub>", "<sup>", "<tela>", "<tele>",
"<teli>", "<telo>", "<tene>", "<ti>", "<vela>", "<vele>", "<veli>", "<velo>",
"<vene>", "<vi>" };
assertMorph(morphemes, select(jcas, MorphologicalFeatures.class));
assertTagset(MorphologicalFeatures.class, "pippi", tags, jcas);
assertTagsetParser(MorphologicalFeatures.class, "pippi", unmappedTags, jcas);
}
private JCas runTest(String language, String variant, String testDocument)
throws Exception
{
AnalysisEngine engine = createEngine(SfstAnnotator.class,
SfstAnnotator.PARAM_VARIANT, variant,
SfstAnnotator.PARAM_MODE, SfstAnnotator.Mode.ALL,
SfstAnnotator.PARAM_PRINT_TAGSET, true);
JCas jcas = TestRunner.runTest(engine, language, testDocument);
return jcas;
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}