//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.uima;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.stream.Collectors;
import org.apache.uima.UIMAException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.jcas.tcas.DocumentAnnotation;
import uk.gov.dstl.baleen.core.history.BaleenHistory;
import uk.gov.dstl.baleen.core.history.DocumentHistory;
import uk.gov.dstl.baleen.core.history.HistoryEvent;
import uk.gov.dstl.baleen.core.history.HistoryEvents;
import uk.gov.dstl.baleen.core.history.Recordable;
import uk.gov.dstl.baleen.core.history.noop.NoopBaleenHistory;
import uk.gov.dstl.baleen.types.Base;
import uk.gov.dstl.baleen.types.semantic.Entity;
import uk.gov.dstl.baleen.types.semantic.ReferenceTarget;
import uk.gov.dstl.baleen.types.semantic.Relation;
import uk.gov.dstl.baleen.uima.utils.UimaUtils;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
/** A support class for Uima within Baleen.
*
* This is used to keep common functions for manipulating Baleen entities and annotations together to avoid
* code duplication. It provides helpers to accessing and creating entities, and standardises history.
*
* Users of Baleen do not need to create this object, as it is typically access through BaleenAnnotator,
* BaleenConsumer, etc and will be preconfigured.
*/
public class UimaSupport {
private final UimaMonitor monitor;
private final String referrer;
private final boolean mergeDistinctEntities;
private final BaleenHistory history;
private final String pipelineName;
/** New instance.
* @param pipelineName the name of the pipeline
* @param clazz the clazz to owning this support
* @param history the history to store to (if null will use the Noop history)
* @param monitor the monitor instance, used for logging.
* @param mergeDistinctEntities when merging should we merge entities when they have distinct reference targets
*/
public UimaSupport(String pipelineName, Class<?> clazz, BaleenHistory history,
UimaMonitor monitor, boolean mergeDistinctEntities) {
this.pipelineName = pipelineName;
this.history = history != null ? history : NoopBaleenHistory.getInstance();
this.referrer = UimaUtils.makePipelineSpecificName(pipelineName, clazz);
this.monitor = monitor;
this.mergeDistinctEntities = mergeDistinctEntities;
}
/** Get the name of the pipeline to which this belongs.
* @return pipeline name
*/
public String getPipelineName() {
return pipelineName;
}
/**
* Add an annotation to the JCas index, notifying UimaMonitor of the fact we
* have done so
*
* @param annot
* Annotation(s) to add
*/
public void add(Annotation... annotations) {
add(Arrays.asList(annotations));
}
/**
* Add an annotation to the JCas index, notifying UimaMonitor of the fact we
* have done so
*
* @param annot
* Annotation(s) to add
*/
public void add(Collection<? extends Annotation> annotations) {
for(Annotation annot : annotations) {
annot.addToIndexes();
monitor.entityAdded(annot.getType().getName());
if(annot instanceof Entity) {
Entity entity = (Entity)annot;
// Add in a value if it doesn't have one
if(Strings.isNullOrEmpty(entity.getValue())) {
entity.setValue( annot.getCoveredText() );
}
addToHistory(annot.getCAS(), HistoryEvents.createAdded((Recordable)annot, referrer));
}
}
}
/**
* Remove an annotation to the JCas index, notifying UimaMonitor of the fact
* we have done so.
*
* Relations that refer to the given annotation will also be removed.
*
* @param annot
* Annotation(s) to remove
*/
public void remove(Collection<? extends Annotation> annotations) {
for(Annotation annot : annotations) {
if(annot instanceof Recordable) {
try {
addToHistory(annot.getCAS().getJCas(), HistoryEvents.createAdded((Recordable)annot, referrer));
} catch (CASException e) {
monitor.error("Unable to add to history on remove",e);
}
}
if(annot instanceof Entity){
for(Relation r : getRelations((Entity)annot)){
monitor.entityRemoved(r.getType().getName());
r.removeFromIndexes();
}
}
monitor.entityRemoved(annot.getType().getName());
annot.removeFromIndexes();
}
}
/**
* Remove an annotation to the JCas index, notifying UimaMonitor of the fact
* we have done so.
*
* Relations that refer to the given annotation will also be removed.
*
* @param annot
* Annotation(s) to remove
*/
public void remove(Annotation... annotations) {
remove(Arrays.asList(annotations));
}
/**
* Add a new annotation, which is merged from the old annotations, removing the old annotations.
*
* @param newAnnotation
* The annotation which is to be added to the document as the merged result of the old annotations
* @param annotations
* Annotation(s) which have been merged and should be removed
*/
public void mergeWithNew(Annotation newAnnotation, Annotation... annotations) {
mergeWithNew(newAnnotation, Arrays.asList(annotations));
}
/**
* Add a new annotation, which is merged from the old annotations, removing the old annotations.
*
* @param newAnnotation
* The annotation which is to be added to the document as the merged result of the old annotations
* @param annotations
* Annotation(s) which have been merged and should be removed
*/
public void mergeWithNew(Annotation newAnnotation, Collection<? extends Annotation> annotations) {
add(newAnnotation);
mergeWithExisting(newAnnotation, annotations);
}
/**
* Merge an existing annotation with old annotations, removing the old annotations.
*
* @param existingAnnotation
* The annotation which exists and is to be left in the document (merged)
* @param annotations
* Annotation(s) which have been merged wiht existingAnnotation and then removed
*/
public void mergeWithExisting(Annotation existingAnnotation, Annotation... annotations) {
mergeWithExisting(existingAnnotation, Arrays.asList(annotations));
}
/**
* Add a new annotation, which is merged from the old annotations, removing the old annotations.
*
* @param existingAnnotation
* The annotation which is to be left in the document (merged)
* @param annotations
* Annotation(s) which will be merged with existingAnnotation and then removed
*/
public void mergeWithExisting(Annotation existingAnnotation, Collection<? extends Annotation> annotations) {
if(annotations == null || annotations.isEmpty()) {
return;
}
if(!(existingAnnotation instanceof Entity)) {
// If the target is just an annotation then remove everything independently of coreference targets
// since annotations does not have this configuration.
mergeWithExistingNoCoref(existingAnnotation, annotations);
return;
}
// If we've got here, then existingAnnotation is an entity and so we need to process it
Entity existingEntity = (Entity) existingAnnotation;
for(Annotation a : annotations) {
if(a instanceof Entity) {
// If an entity we check if they point to the same reference target
Entity entity = (Entity)a;
mergeEntities(entity, existingEntity);
} else {
// If an annotation just remove
mergeWithExistingNoCoref(existingAnnotation, Lists.newArrayList(a));
}
}
}
/**
* Merge entity onto targetEntity (assuming they have the same ReferentTarget), updating relationships as required.
*
* @return True if merge was successful, false otherwise
*/
private boolean mergeEntities(Entity entity, Entity targetEntity){
ReferenceTarget targetRef = targetEntity.getReferent();
ReferenceTarget entityRef = entity.getReferent();
if(mergeDistinctEntities || isSameTarget(targetRef, entityRef)) {
addMergeToHistory(targetEntity, entity);
//Update relationship pointers
for(Relation r : getRelations(entity)){
if(r.getSource() == entity){
r.setSource(targetEntity);
}
if(r.getTarget() == entity){
r.setTarget(targetEntity);
}
}
remove(entity);
return true;
} else {
monitor.info("Not merging objects {} and {} as they have different referents", targetEntity.getInternalId(), entity.getInternalId());
return false;
}
}
/**
* Merge annotations whilst ignoring coreferences.
* It is recommended that mergeWithExisting(...) is used instead as that will pass off to this method when appropriate
*
* @param existingAnnotation
* The annotation which is to be left in the document (merged)
* @param annotations
* Annotation(s) which will be merged with existingAnnotation and then removed
*/
public void mergeWithExistingNoCoref(Annotation existingAnnotation, Collection<? extends Annotation> annotations){
for(Annotation a : annotations) {
addMergeToHistory(existingAnnotation, a);
}
remove(annotations);
}
private boolean isSameTarget(ReferenceTarget rt1, ReferenceTarget rt2){
return rt1 == rt2
|| rt1 != null && rt1.equals(rt2);
}
private void addMergeToHistory(Annotation keep, Annotation removed) {
if(keep instanceof Recordable && removed instanceof Base) {
Recordable r = (Recordable)keep;
Base b = (Base)removed;
try {
addToHistory(keep.getCAS().getJCas(), HistoryEvents.createMerged(r, referrer, b.getInternalId()));
} catch (CASException e) {
monitor.error("Unable to add merge to history", e);
}
}
}
/** Adds a event to the history for this CAS document.
* @param cas the target document for the event
* @param event the event to add
*/
public void addToHistory(CAS cas, HistoryEvent event) {
try {
getDocumentHistory(cas.getJCas()).add(event);
} catch (CASException e) {
monitor.error("Unable to add to history on add",e);
}
}
/** Adds a event to the history for this jcas document.
* @param jCas the target document for the event
* @param event the event to add
*/
public void addToHistory(JCas jCas, HistoryEvent event) {
getDocumentHistory(jCas).add(event);
}
/** Get (or create) the history associated with the document.
* @param jCas the target document
* @return the history associated with the document
*/
public DocumentHistory getDocumentHistory(JCas jCas) {
String documentId = pipelineName + ":" + getDocumentAnnotation(jCas).getHash();
return history.getHistory(documentId);
}
/**
* Return the document annotation.
*
* @param jCas
* @return the document annotation
*/
public DocumentAnnotation getDocumentAnnotation(JCas jCas){
return (DocumentAnnotation) jCas.getDocumentAnnotationFs();
}
/**
* Get relations that have a given entity as either the source or the target
*
* @param e The given entity
* @return Collection of relations that refer to the given Entity
*/
public Collection<Relation> getRelations(Entity e){
try{
JCas jCas = e.getCAS().getJCas();
Collection<Relation> relations = JCasUtil.select(jCas, Relation.class);
return relations.stream().filter(r -> r.getSource() == e || r.getTarget() == e).collect(Collectors.toList());
}catch(UIMAException ue){
monitor.warn("Unable to get relations from entity", ue);
return Collections.emptyList();
}
}
}