/*
* Copyright 2010
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.tokit;
import static java.util.Arrays.asList;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.util.JCasUtil.select;
import static org.apache.uima.fit.util.JCasUtil.toText;
import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.commons.jxpath.ClassFunctions;
import org.apache.commons.jxpath.DynamicPropertyHandler;
import org.apache.commons.jxpath.ExpressionContext;
import org.apache.commons.jxpath.JXPathContext;
import org.apache.commons.jxpath.JXPathIntrospector;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.factory.JCasBuilder;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.jcas.JCas;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.N;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PR;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PUNC;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.V;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
import de.tudarmstadt.ukp.dkpro.core.tokit.TokenMerger.LemmaMode;
public class TokenMergerTest
{
@Test
public void testSimpleMerge()
throws Exception
{
AnalysisEngine filter = createEngine(TokenMerger.class,
TokenMerger.PARAM_ANNOTATION_TYPE, NamedEntity.class);
JCas jcas = initCas();
filter.process(jcas);
assertEquals(asList("I", "love", "New York", "."), pick(select(jcas, Token.class), "cas:text()"));
}
@Test
public void testWithConstraintMatch()
throws Exception
{
AnalysisEngine filter = createEngine(TokenMerger.class,
TokenMerger.PARAM_ANNOTATION_TYPE, NamedEntity.class,
TokenMerger.PARAM_CONSTRAINT, ".[value = 'LOCATION']");
JCas jcas = initCas();
filter.process(jcas);
assertEquals(asList("I", "love", "New York", "."), toText(select(jcas, Token.class)));
}
@Test
public void testWithConstraintNoMatch()
throws Exception
{
AnalysisEngine filter = createEngine(TokenMerger.class,
TokenMerger.PARAM_ANNOTATION_TYPE, NamedEntity.class,
TokenMerger.PARAM_CONSTRAINT, ".[value = 'PERSON']");
JCas jcas = initCas();
filter.process(jcas);
assertEquals(asList("I", "love", "New", "York", "."), toText(select(jcas, Token.class)));
}
@Test
public void testSimpleMergeLemmaJoin()
throws Exception
{
AnalysisEngine filter = createEngine(TokenMerger.class,
TokenMerger.PARAM_ANNOTATION_TYPE, NamedEntity.class,
TokenMerger.PARAM_LEMMA_MODE, LemmaMode.JOIN);
JCas jcas = initCas();
filter.process(jcas);
assertEquals(asList("I", "love", "new york", "."), pick(select(jcas, Token.class), "./lemma/value"));
}
private JCas initCas() throws UIMAException
{
JCas jcas = JCasFactory.createJCas();
JCasBuilder builder = new JCasBuilder(jcas);
setLemmaPos(builder.add("I", Token.class), PR.class, "PR", "I");
builder.add(" ");
setLemmaPos(builder.add("love", Token.class), V.class, "V", "love");
builder.add(" ");
int m = setLemmaPos(builder.add("New", Token.class), N.class, "N", "new").getBegin();
builder.add(" ");
setLemmaPos(builder.add("York", Token.class), N.class, "N", "york");
NamedEntity city = builder.add(m, NamedEntity.class);
city.setValue("LOCATION");
setLemmaPos(builder.add(".", Token.class), PUNC.class, "PUNT", ".");
builder.close();
return builder.getJCas();
}
private Token setLemmaPos(Token aToken, Class<? extends POS> aPosType, String aPosValue,
String aLemma)
throws CASException
{
CAS cas = aToken.getCAS();
POS pos = (POS) cas.createAnnotation(CasUtil.getType(cas, aPosType), aToken.getBegin(),
aToken.getEnd());
pos.setPosValue(aPosValue);
pos.setCoarseValue(pos.getClass().equals(POS.class) ? null
: pos.getType().getShortName().intern());
aToken.setPos(pos);
Lemma lemma = new Lemma(aToken.getCAS().getJCas(), aToken.getBegin(), aToken.getEnd());
lemma.setValue(aLemma);
aToken.setLemma(lemma);
return aToken;
}
// =============================================================================================
// == JXPath helper methods
// =============================================================================================
{
JXPathIntrospector.registerDynamicClass(FeatureStructure.class, FeatureStructureHandler.class);
}
public static class FeatureStructureHandler implements DynamicPropertyHandler
{
@Override
public String[] getPropertyNames(Object aObject)
{
FeatureStructure fs = (FeatureStructure) aObject;
Type t = fs.getType();
List<Feature> features = t.getFeatures();
String[] featureNames = new String[features.size()];
int i = 0;
for (Feature f : features) {
featureNames[i] = f.getShortName();
i++;
}
return featureNames;
}
@Override
public Object getProperty(Object aObject, String aPropertyName)
{
FeatureStructure fs = (FeatureStructure) aObject;
Feature f = fs.getType().getFeatureByBaseName(aPropertyName);
if (CAS.TYPE_NAME_BOOLEAN.equals(f.getRange().getName())) {
return fs.getBooleanValue(f);
}
else if (CAS.TYPE_NAME_BYTE.equals(f.getRange().getName())) {
return fs.getByteValue(f);
}
else if (CAS.TYPE_NAME_DOUBLE.equals(f.getRange().getName())) {
return fs.getDoubleValue(f);
}
else if (CAS.TYPE_NAME_FLOAT.equals(f.getRange().getName())) {
return fs.getFloatValue(f);
}
else if (CAS.TYPE_NAME_INTEGER.equals(f.getRange().getName())) {
return fs.getIntValue(f);
}
else if (CAS.TYPE_NAME_LONG.equals(f.getRange().getName())) {
return fs.getLongValue(f);
}
else if (CAS.TYPE_NAME_SHORT.equals(f.getRange().getName())) {
return fs.getShortValue(f);
}
else if (CAS.TYPE_NAME_STRING.equals(f.getRange().getName())) {
return fs.getStringValue(f);
}
else {
return fs.getFeatureValue(f);
}
}
@Override
public void setProperty(Object aObject, String aPropertyName, Object aValue)
{
throw new UnsupportedOperationException();
}
}
@SuppressWarnings("unchecked")
public static List<Object> pick(Collection<?> aContext, String aPath)
{
List<Object> result = new ArrayList<Object>();
for (Object a : aContext) {
JXPathContext ctx = JXPathContext.newContext(a);
ctx.setFunctions(new ClassFunctions(JXPathCasFunctions.class, "cas"));
result.addAll(ctx.selectNodes(aPath));
}
return result;
}
public static class JXPathCasFunctions
{
public static String text(ExpressionContext aCtx)
{
Object value = aCtx.getContextNodePointer().getValue();
if (value instanceof AnnotationFS) {
return ((AnnotationFS) value).getCoveredText();
}
else {
return String.valueOf(value);
}
}
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}