/*
* Copyright 2012
* Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter;
import static de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getAddr;
import static de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getFeature;
import static de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.isSameSentence;
import static de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.selectByAddr;
import static de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.selectOverlapping;
import static de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.setFeature;
import static org.apache.uima.fit.util.CasUtil.getType;
import static org.apache.uima.fit.util.CasUtil.selectCovered;
import static org.apache.uima.fit.util.JCasUtil.selectCovered;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.jcas.JCas;
import de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException;
import de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.MultipleSentenceCoveredException;
import de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.VID;
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature;
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
/**
* A class that is used to create Brat Span to CAS and vice-versa.
*/
public class SpanAdapter
implements TypeAdapter, AutomationTypeAdapter
{
/**
* The minimum offset of the annotation is on token, and the annotation can't span multiple
* tokens too
*/
private boolean lockToTokenOffsets;
/**
* The minimum offset of the annotation is on token, and the annotation can span multiple token
* too
*/
private boolean allowMultipleToken;
/**
* Allow multiple annotations of the same layer (only when the type value is different)
*/
private boolean allowStacking;
private boolean crossMultipleSentence;
private boolean deletable;
private AnnotationLayer layer;
private Map<String, AnnotationFeature> features;
// value NILL for a token when the training file do not have annotations provided
private final static String NILL = "__nill__";
public SpanAdapter(AnnotationLayer aLayer, Collection<AnnotationFeature> aFeatures)
{
layer = aLayer;
// Using a sorted map here so we have reliable positions in the map when iterating. We use
// these positions to remember the armed slots!
features = new TreeMap<String, AnnotationFeature>();
for (AnnotationFeature f : aFeatures) {
features.put(f.getName(), f);
}
}
/**
* Span can only be made on a single token (not multiple tokens), e.g. for POS or Lemma
* annotations. If this is set and a span is made across multiple tokens, then one annotation of
* the specified type will be created for each token. If this is not set, a single annotation
* covering all tokens is created.
*
* @param aSingleTokenBehavior
* whether to enable the behavior.
*/
public void setLockToTokenOffsets(boolean aSingleTokenBehavior)
{
lockToTokenOffsets = aSingleTokenBehavior;
}
/**
* @return whether the behavior is enabled.
* @see #setLockToTokenOffsets(boolean)
*/
public boolean isLockToTokenOffsets()
{
return lockToTokenOffsets;
}
public boolean isAllowMultipleToken()
{
return allowMultipleToken;
}
public void setAllowMultipleToken(boolean allowMultipleToken)
{
this.allowMultipleToken = allowMultipleToken;
}
public boolean isAllowStacking()
{
return allowStacking;
}
public void setAllowStacking(boolean allowStacking)
{
this.allowStacking = allowStacking;
}
public boolean isCrossMultipleSentence()
{
return crossMultipleSentence;
}
public void setCrossMultipleSentence(boolean crossMultipleSentence)
{
this.crossMultipleSentence = crossMultipleSentence;
}
/**
* Add new span annotation into the CAS and return the the id of the span annotation
*
* @param aJcas
* the JCas.
* @param aBegin
* the begin offset.
* @param aEnd
* the end offset.
* @param aFeature
* the feature.
* @param aValue
* the value of the annotation for the span
* @return the ID.
* @throws AnnotationException
* if the annotation cannot be created/updated.
*/
public Integer add(JCas aJcas, int aBegin, int aEnd, AnnotationFeature aFeature, Object aValue)
throws AnnotationException
{
// if zero-offset annotation is requested
if (aBegin == aEnd) {
return updateCas(aJcas.getCas(), aBegin, aEnd, aFeature, aValue);
}
if (crossMultipleSentence || isSameSentence(aJcas, aBegin, aEnd)) {
if (lockToTokenOffsets) {
List<Token> tokens = selectOverlapping(aJcas, Token.class, aBegin, aEnd);
if (tokens.isEmpty()) {
throw new AnnotationException("No token is found to annotate");
}
return updateCas(aJcas.getCas(), tokens.get(0).getBegin(), tokens.get(0).getEnd(),
aFeature, aValue);
}
else if (allowMultipleToken) {
List<Token> tokens = selectOverlapping(aJcas, Token.class, aBegin, aEnd);
// update the begin and ends (no sub token selection
aBegin = tokens.get(0).getBegin();
aEnd = tokens.get(tokens.size() - 1).getEnd();
return updateCas(aJcas.getCas(), aBegin, aEnd, aFeature, aValue);
}
else {
return updateCas(aJcas.getCas(), aBegin, aEnd, aFeature, aValue);
}
}
else {
throw new MultipleSentenceCoveredException("Annotation coveres multiple sentences, "
+ "limit your annotation to single sentence!");
}
}
// get feature Value of existing span annotation
public Serializable getSpan(JCas aJCas, int aBegin, int aEnd, AnnotationFeature aFeature,
String aLabelValue)
{
if(allowStacking){
return null;
}
int begin;
int end;
// update the begin and ends (no sub token selection)
if (lockToTokenOffsets) {
List<Token> tokens = selectOverlapping(aJCas, Token.class, aBegin, aEnd);
begin = tokens.get(0).getBegin();
end = tokens.get(tokens.size() - 1).getEnd();
}
else if (allowMultipleToken) {
List<Token> tokens = selectOverlapping(aJCas, Token.class, aBegin, aEnd);
begin = tokens.get(0).getBegin();
end = tokens.get(tokens.size() - 1).getEnd();
}
else {
begin = aBegin;
end = aEnd;
}
Type type = CasUtil.getType(aJCas.getCas(), getAnnotationTypeName());
for (AnnotationFS fs : CasUtil.selectCovered(aJCas.getCas(), type, begin, end)) {
if (fs.getBegin() == aBegin && fs.getEnd() == aEnd) {
return getFeatureValue(fs, aFeature);
}
}
return null;
}
public static Serializable getFeatureValue(FeatureStructure aFs, AnnotationFeature aFeature)
{
Feature uimaFeature = aFs.getType().getFeatureByBaseName(aFeature.getName());
switch (aFeature.getType()) {
case CAS.TYPE_NAME_STRING:
return aFs.getFeatureValueAsString(uimaFeature);
case CAS.TYPE_NAME_BOOLEAN:
return aFs.getBooleanValue(uimaFeature);
case CAS.TYPE_NAME_FLOAT:
return aFs.getFloatValue(uimaFeature);
case CAS.TYPE_NAME_INTEGER:
return aFs.getIntValue(uimaFeature);
default:
return aFs.getFeatureValueAsString(uimaFeature);
}
}
/**
* A Helper method to add annotation to CAS
*/
private Integer updateCas(CAS aCas, int aBegin, int aEnd, AnnotationFeature aFeature,
Object aValue)
throws AnnotationException
{
Type type = CasUtil.getType(aCas, getAnnotationTypeName());
for (AnnotationFS fs : CasUtil.selectCovered(aCas, type, aBegin, aEnd)) {
if (fs.getBegin() == aBegin && fs.getEnd() == aEnd) {
if (!allowStacking) {
setFeature(fs, aFeature, aValue);
return getAddr(fs);
}
}
}
AnnotationFS newAnnotation = createAnnotation(aCas, aBegin, aEnd, aFeature, aValue, type);
return getAddr(newAnnotation);
}
private AnnotationFS createAnnotation(CAS aCas, int aBegin, int aEnd,
AnnotationFeature aFeature, Object aValue, Type aType)
throws AnnotationException
{
AnnotationFS newAnnotation = aCas.createAnnotation(aType, aBegin, aEnd);
setFeature(newAnnotation, aFeature, aValue);
if (getAttachFeatureName() != null) {
Type theType = CasUtil.getType(aCas, getAttachTypeName());
Feature attachFeature = theType.getFeatureByBaseName(getAttachFeatureName());
if (CasUtil.selectCovered(aCas, theType, aBegin, aEnd).isEmpty()) {
throw new AnnotationException("No annotation of type [" + getAttachTypeName()
+ "] to attach to at location [" + aBegin + "-" + aEnd + "].");
}
CasUtil.selectCovered(aCas, theType, aBegin, aEnd).get(0)
.setFeatureValue(attachFeature, newAnnotation);
}
aCas.addFsToIndexes(newAnnotation);
return newAnnotation;
}
/**
* A Helper method to add annotation to a Curation CAS
*/
public AnnotationFS updateCurationCas(CAS aCas, int aBegin, int aEnd,
AnnotationFeature aFeature, Object aValue, AnnotationFS aClickedFs, boolean aIsSlot)
throws AnnotationException
{
Type type = CasUtil.getType(aCas, getAnnotationTypeName());
AnnotationFS newAnnotation = null;
int countAnno = 0;
for (AnnotationFS fs : CasUtil.selectCovered(aCas, type, aBegin, aEnd)) {
countAnno++;
newAnnotation = fs;
if (fs.getBegin() == aBegin && fs.getEnd() == aEnd) {
if (!allowStacking) {
setFeature(fs, aFeature, aValue);
return fs;
}
// if stacking, get other existing feature values before updating with the new
// feature
StringBuilder clickedFtValues = new StringBuilder();
StringBuilder curationFtValues = new StringBuilder();
for (Feature feat : type.getFeatures()) {
switch (feat.getRange().getName()) {
case CAS.TYPE_NAME_STRING:
case CAS.TYPE_NAME_BOOLEAN:
case CAS.TYPE_NAME_FLOAT:
case CAS.TYPE_NAME_INTEGER:
clickedFtValues.append(aClickedFs.getFeatureValueAsString(feat));
curationFtValues.append(fs.getFeatureValueAsString(feat));
default:
continue;
}
}
if (clickedFtValues.toString().equals(curationFtValues.toString())) {
return fs;
}
}
}
if (!aIsSlot) {
newAnnotation = createAnnotation(aCas, aBegin, aEnd, aFeature, aValue, type);
}
if (aIsSlot && countAnno > 1) {
throw new AnnotationException(
"There are different stacking annotation on curation panel, cannot copy the slot feature");
}
return newAnnotation;
}
@Override
public void delete(JCas aJCas, VID aVid)
{
FeatureStructure fs = selectByAddr(aJCas, FeatureStructure.class, aVid.getId());
aJCas.removeFsFromIndexes(fs);
// delete associated attachFeature
if (getAttachTypeName() == null) {
return;
}
Type theType = CasUtil.getType(aJCas.getCas(), getAttachTypeName());
Feature attachFeature = theType.getFeatureByBaseName(getAttachFeatureName());
if (attachFeature == null) {
return;
}
CasUtil.selectCovered(aJCas.getCas(), theType, ((AnnotationFS) fs).getBegin(),
((AnnotationFS) fs).getEnd()).get(0).setFeatureValue(attachFeature, null);
}
@Override
public void delete(JCas aJCas, AnnotationFeature aFeature, int aBegin, int aEnd, Object aValue)
{
Type type = CasUtil.getType(aJCas.getCas(), getAnnotationTypeName());
for (AnnotationFS fs : CasUtil.selectCovered(aJCas.getCas(), type, aBegin, aEnd)) {
if (fs.getBegin() == aBegin && fs.getEnd() == aEnd) {
if (ObjectUtils.equals(getFeature(fs, aFeature), aValue)) {
delete(aJCas, new VID(getAddr(fs)));
}
}
}
}
@Override
public long getTypeId()
{
return layer.getId();
}
@Override
public Type getAnnotationType(CAS cas)
{
return CasUtil.getType(cas, getAnnotationTypeName());
}
/**
* The UIMA type name.
*/
@Override
public String getAnnotationTypeName()
{
return layer.getName();
}
public void setDeletable(boolean aDeletable)
{
this.deletable = aDeletable;
}
@Override
public boolean isDeletable()
{
return deletable;
}
@Override
public String getAttachFeatureName()
{
return layer.getAttachFeature() == null ? null : layer.getAttachFeature().getName();
}
@Override
public List<String> getAnnotation(Sentence aSentence, AnnotationFeature aFeature)
{
CAS cas = aSentence.getCAS();
Type type = getType(cas, getAnnotationTypeName());
List<String> annotations = new ArrayList<String>();
for (Token token : selectCovered(Token.class, aSentence)) {
List<AnnotationFS> tokenLevelAnnotations = selectCovered(type, token);
if (tokenLevelAnnotations.size() > 0) {
AnnotationFS anno = tokenLevelAnnotations.get(0);
Feature labelFeature = anno.getType().getFeatureByBaseName(aFeature.getName());
annotations.add(anno.getFeatureValueAsString(labelFeature));
}
else {
annotations.add(NILL);
}
}
return annotations;
}
public Map<Integer, String> getMultipleAnnotation(Sentence sentence, AnnotationFeature aFeature)
throws CASException
{
Map<Integer, String> multAnno = new HashMap<Integer, String>();
Type type = getType(sentence.getCAS(), getAnnotationTypeName());
for (AnnotationFS fs : selectCovered(type, sentence)) {
boolean isBegin = true;
Feature labelFeature = fs.getType().getFeatureByBaseName(aFeature.getName());
for (Token token : selectCovered(Token.class, fs)) {
if (multAnno.get(getAddr(token)) == null) {
if (isBegin) {
multAnno.put(getAddr(token),
"B-" + fs.getFeatureValueAsString(labelFeature));
isBegin = false;
}
else {
multAnno.put(getAddr(token),
"I-" + fs.getFeatureValueAsString(labelFeature));
}
}
}
}
return multAnno;
}
/**
* A field that takes the name of the annotation to attach to, e.g.
* "de.tudarmstadt...type.Token" (Token.class.getName())
*/
@Override
public String getAttachTypeName()
{
return layer.getAttachType() == null ? null : layer.getAttachType().getName();
}
@Override
public void updateFeature(JCas aJcas, AnnotationFeature aFeature, int aAddress, Object aValue)
{
FeatureStructure fs = selectByAddr(aJcas, FeatureStructure.class, aAddress);
setFeature(fs, aFeature, aValue);
}
@Override
public AnnotationLayer getLayer()
{
return layer;
}
@Override
public Collection<AnnotationFeature> listFeatures()
{
return features.values();
}
}