//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.relations.helpers; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.Objects; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.jcas.JCas; import uk.gov.dstl.baleen.annotators.patterns.data.RelationWrapper; import uk.gov.dstl.baleen.types.semantic.Entity; import uk.gov.dstl.baleen.types.semantic.Relation; import uk.gov.dstl.baleen.uima.BaleenAnnotator; import uk.gov.dstl.baleen.uima.utils.ComparableEntitySpanUtils; /** * A base class for relationship extractors which use interaction words as a * trigger. * * Implementations should override {@link #extract(JCas) extract}, and * potentially {@link #preExtract(JCas) preExtract} and * {@link #postExtract(JCas) postExtract}, which both allow for creation and * clean up of objects related to extraction. * */ public abstract class AbstractRelationshipAnnotator extends BaleenAnnotator { @Override protected final void doProcess(final JCas jCas) throws AnalysisEngineProcessException { try { preExtract(jCas); extract(jCas); } finally { postExtract(jCas); } } /** * Extract relations from the jCas. * * It is the overridders responsibility to add these to the jCas Index * (addRelationsToIndex) * * @param jCas * the j cas * @throws AnalysisEngineProcessException */ protected abstract void extract(JCas jCas) throws AnalysisEngineProcessException; /** * Called before extract(). * * @param jCas * the jcas */ protected void preExtract(final JCas jCas) { // Do nothing } /** * Called after extract (including on exception). * * @param jCas * the jcas */ protected void postExtract(final JCas jCas) { // Do nothing } /** * Adds a stream of relations to index. * * @param relations * the relations */ protected void addRelationsToIndex(final Stream<Relation> relations) { if (relations != null) { relations.filter(Objects::nonNull) // Only add events aren't in the same // Prevents overlapping spans since that makes no sense .filter(r -> r.getSource().getInternalId() != r.getTarget().getInternalId() && !ComparableEntitySpanUtils.overlaps(r.getSource(), r.getTarget())) // Discard anything which has no relationship type // TODO: Is this sensible? // These are direct connection between A and B for the // dependency graph (you can't be more connected than that) // but then you have no relationship text to work with. .filter(r -> r.getRelationshipType() != null || !StringUtils.isBlank(r.getRelationshipType())) .forEach(this::addToJCasIndex); } } /** * Creates the relation. * * @param jCas * the jcas * @param source * the source the source entity * @param target * the target the target entity * @param begin * the begin of the relation * @param end * the end of the relation * @param type * the type of the relation * @param subType * the sub type of the relation * @param value * the value of the relation * @param confidence * the confidence of the relation * @return the relation */ protected Relation createRelation(final JCas jCas, final Entity source, final Entity target, int begin, int end, String type, String subType, String value, Float confidence) { final Relation r = new Relation(jCas); r.setBegin(begin); r.setEnd(end); r.setRelationshipType(type); r.setRelationSubType(subType); r.setSource(source); r.setTarget(target); r.setValue(value); r.setConfidence(confidence); return r; } /** * Creates the relations of the same type between from all the entities on * the source list to all the entities on the target list. * * @param jCas * the j cas * @param sources * the sources * @param targets * the targets * @param begin * the begin of the relation * @param end * the end of the relation * @param type * the type of the relation * @param subType * the sub type of the relation * @param value * the value of the relation * @param confidence * the confidence of the relation * @return the stream of relations */ protected Stream<Relation> createPairwiseRelations(final JCas jCas, final List<Entity> sources, final List<Entity> targets, int begin, int end, String type, String subType, String value, Float confidence) { return sources.stream().flatMap(l -> targets.stream() .map(r -> createRelation(jCas, l, r, begin, end, type, subType, value, confidence))); } /** * Creates the relations between all the entities provided (but not between * an entity and itself). * * @param jCas * the j cas * @param collection * the collection of entities to related * @param begin * the begin of the relation * @param end * the end of the relation * @param type * the type of the relation * @param subType * the sub type of the relation * @param value * the value of the relation * @param confidence * the confidence of the relation * @return the stream of relations */ protected Stream<Relation> createMeshedRelations(final JCas jCas, final Collection<Entity> collection, int begin, int end, String type, String subType, String value, Float confidence) { final List<Relation> relations = new LinkedList<>(); List<Entity> entities; if (collection instanceof List) { entities = (List<Entity>) collection; } else { entities = new ArrayList<>(collection); } final ListIterator<Entity> outer = entities.listIterator(); while (outer.hasNext()) { final Entity source = outer.next(); final ListIterator<Entity> inner = entities.listIterator(outer.nextIndex()); while (inner.hasNext()) { final Entity target = inner.next(); relations.add(createRelation(jCas, source, target, begin, end, type, subType, value, confidence)); } } return relations.stream(); } /** * Make the stream distinct (no relations of the same type, between the same * entities). * * @param stream * the stream * @return the stream */ protected Stream<Relation> distinct(final Stream<Relation> stream) { return stream.filter(Objects::nonNull).map(RelationWrapper::new).distinct().map(RelationWrapper::getRelation); } }