/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.lbj;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.util.JCasUtil.select;
import static de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations.*;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.jcas.JCas;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
public class IllinoisChunkerTest
{
@Test
public void testEnglish()
throws Exception
{
JCas jcas = runTest("en", null, "We need a very complicated example sentence, which " +
"contains as many constituents and dependencies as possible.");
String[] chunks = {
"[ 0, 2]NC(NP) (We)",
"[ 3, 7]VC(VP) (need)",
"[ 8, 43]NC(NP) (a very complicated example sentence)",
"[ 45, 50]NC(NP) (which)",
"[ 51, 59]VC(VP) (contains)",
"[ 60, 62]PC(PP) (as)",
"[ 63, 97]NC(NP) (many constituents and dependencies)",
"[ 98,100]PC(PP) (as)",
"[101,109]ADJC(ADJP) (possible)" };
String[] chunkTags = { "ADJP", "ADVP", "CONJP", "INTJ", "LST", "NP", "PP", "PRT", "SBAR",
"UCP", "VP" };
String[] unmappedChunk = {};
assertChunks(chunks, select(jcas, Chunk.class));
assertTagset(Chunk.class, "conll2000", chunkTags, jcas);
assertTagsetMapping(Chunk.class, "conll2000", unmappedChunk, jcas);
}
private JCas runTest(String aLanguage, String aVariant, String aText)
throws Exception
{
AnalysisEngineDescription segmenter = createEngineDescription(IllinoisSegmenter.class);
AnalysisEngineDescription tagger = createEngineDescription(IllinoisPosTagger.class);
AnalysisEngineDescription chunker = createEngineDescription(IllinoisChunker.class,
//IllinoisChunker.PARAM_VARIANT, aVariant,
IllinoisChunker.PARAM_PRINT_TAGSET, true);
AnalysisEngineDescription aggregate = createEngineDescription(segmenter, tagger, chunker);
AnalysisEngine engine = createEngine(aggregate);
JCas jcas = engine.newJCas();
jcas.setDocumentLanguage(aLanguage);
jcas.setDocumentText(aText);
engine.process(jcas);
return jcas;
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}