//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.cleaners;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.types.semantic.Entity;
import uk.gov.dstl.baleen.types.semantic.ReferenceTarget;
import uk.gov.dstl.baleen.uima.BaleenAnnotator;
import uk.gov.dstl.baleen.uima.utils.ComparableEntitySpanUtils;
import uk.gov.dstl.baleen.uima.utils.ReferentUtils;
/**
* Convert non-entity annotations into entity annotations, in the case that an annotation has a
* referent target that is shared with an entity.
* <p>
* This is useful for consumers that work specifically with entities but not with other types.
*
* @baleen.javadoc
*/
public class ReferentToEntity extends BaleenAnnotator {
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
final Multimap<ReferenceTarget, Entity> referentMap = ReferentUtils.createReferentMap(jCas, Entity.class);
final Collection<Entity> entities = new HashSet<>(JCasUtil.select(jCas, Entity.class));
final Map<ReferenceTarget, Entity> targets = ReferentUtils.filterToSingle(referentMap, ReferentToEntity::getBestEntity);
// Now look through the non-entities and create entities in their place.
final List<Entity> toAdd = ReferentUtils.streamReferent(jCas, targets)
.map(a -> {
final ReferenceTarget referent = a.getReferent();
final Entity entity = targets.get(referent);
if (entity != null && !entities.contains(a)) {
return ComparableEntitySpanUtils.copyEntity(jCas, a.getBegin(), a.getEnd(), entity);
} else {
return null;
}
}).filter(Objects::nonNull)
.collect(Collectors.toList());
addToJCasIndex(toAdd);
}
/**
* Gets the best entity from the list.
*
* @param list
* the list
* @return the best entity
*/
protected static Entity getBestEntity(Collection<Entity> list) {
return list.stream()
.reduce((a, b) -> isBetterEntity(a, b) ? b : a)
.get();
}
/**
* Checks if is better entity.
*
* @param original
* the original
* @param challenger
* the challenger
* @return true, if is better entity
*/
protected static boolean isBetterEntity(Entity original, Entity challenger) {
// Simple version, just look for the longest string
// we could look at how complete the attributes are, etc
String origValue = original.getValue();
if(origValue == null)
origValue = original.getCoveredText();
String challValue = challenger.getValue();
if(challValue == null)
challValue = challenger.getCoveredText();
return origValue.length() < challValue.length();
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(ImmutableSet.of(Entity.class), ImmutableSet.of(Entity.class));
}
}