//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.cleaners;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.bson.Document;
import com.google.common.collect.ImmutableSet;
import com.mongodb.client.MongoCollection;
import uk.gov.dstl.baleen.annotators.patterns.data.RelationConstraint;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.resources.SharedMongoResource;
import uk.gov.dstl.baleen.types.semantic.Relation;
import uk.gov.dstl.baleen.uima.BaleenAnnotator;
/**
* Removes relationships that don't match UIMA type constraints.
* <p>
* Many relationships will only make sense between specific entity types. For example (Person, went
* to, Location) not (DateTime, went to, Location). This filter allows for relational type
* constraints.
* <p>
* Since relationship extractors may have different capabilities (e.g. finding the direction of
* relationships, discovering new unknown relationships) there are several configuration parameters
* which relax the strictness of filtering.
* <p>
* Mongo constraint documents are formed as:
*
* <pre>
* {
* source: 'type of source',
* target: 'type of source',
* type: 'relation type',
* }
* </pre>
*
* See {@link UploadInteractionsToMongo} and {@link MongoInteractionWriter} for information how to
* create this collection.
*
* @baleen.javadoc
*/
public class RelationTypeFilter extends BaleenAnnotator {
/**
* Connection to Mongo
*
* @baleen.resource uk.gov.dstl.baleen.resources.SharedMongoResource
*/
public static final String KEY_MONGO = "mongo";
@ExternalResource(key = KEY_MONGO)
private SharedMongoResource mongo;
/**
* The name of the Mongo collection containing the relation types
*
* @baleen.config gazetteer
*/
public static final String PARAM_COLLECTION = "collection";
@ConfigurationParameter(name = PARAM_COLLECTION, defaultValue = "relationTypes")
private String collection;
/**
* The name of the field in Mongo that contains the relation type
*
* @baleen.config type
*/
public static final String PARAM_TYPE_FIELD = "typeField";
@ConfigurationParameter(name = PARAM_TYPE_FIELD, defaultValue = "type")
private String typeField;
/**
* The name of the field in Mongo that contains the relation sub type
*
* @baleen.config type
*/
public static final String PARAM_SUBTYPE_FIELD = "subTypeField";
@ConfigurationParameter(name = PARAM_SUBTYPE_FIELD, defaultValue = "subType")
private String subTypeField;
/**
* The name of the field in Mongo that contains the relation source type
*
* @baleen.config source
*/
public static final String PARAM_SOURCE_FIELD = "sourceField";
@ConfigurationParameter(name = PARAM_SOURCE_FIELD, defaultValue = "source")
private String sourceField;
/**
* The name of the field in Mongo that contains the relation source type
*
* @baleen.config target
*/
public static final String PARAM_TARGET_FIELD = "targetField";
@ConfigurationParameter(name = PARAM_TARGET_FIELD, defaultValue = "target")
private String targetField;
/**
* The name of the field in Mongo that contains the relation pos
*
* @baleen.config posField pos
*/
public static final String PARAM_POS_FIELD = "posField";
@ConfigurationParameter(name = PARAM_POS_FIELD, defaultValue = "pos")
private String posField;
/**
* Determines strictness of filtering.
*
* In strict mode the relationship type must be defined and the source and target type the same
* in order to pass the filter. In non-strict mode, if the relationship type has no constraints
* then the relationship will pass. If the relationship type has constraints then these must be
* adhered too.
*
* @baleen.config false
*/
public static final String PARAM_STRICT = "strict";
@ConfigurationParameter(name = PARAM_STRICT, defaultValue = "false")
private boolean strict;
/**
* Determines if relations can be considered symmetric (source and target swapped)
*
* @baleen.config true
*/
public static final String PARAM_SYMMETRIC = "symmetric";
@ConfigurationParameter(name = PARAM_SYMMETRIC, defaultValue = "true")
private boolean symetric;
private final Map<String, Set<RelationConstraint>> constraints = new HashMap<>();
@Override
public void doInitialize(final UimaContext aContext) throws ResourceInitializationException {
super.doInitialize(aContext);
final MongoCollection<Document> dbCollection = mongo.getDB().getCollection(collection);
for(Document o : dbCollection.find()){
RelationConstraint constraint = new RelationConstraint((String) o.get(typeField),
(String) o.get(subTypeField),
(String) o.get(posField),
(String) o.get(sourceField),
(String) o.get(targetField));
if (constraint.isValid()) {
Set<RelationConstraint> set = constraints.get(constraint.getType());
if (set == null) {
set = new HashSet<>();
constraints.put(constraint.getType().toLowerCase(), set);
}
set.add(constraint);
}
}
}
@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
final List<Relation> toRemove = new ArrayList<>();
for (final Relation relation : JCasUtil.select(jCas, Relation.class)) {
final String type = relation.getRelationshipType().toLowerCase();
final Set<RelationConstraint> rcs = constraints.get(type);
boolean remove;
if (rcs == null || rcs.isEmpty()) {
// In strict mode we remove
if (strict) {
remove = true;
} else {
remove = false;
}
} else {
remove = !checkValid(rcs, relation);
}
if (remove) {
toRemove.add(relation);
}
}
removeFromJCasIndex(toRemove);
}
/**
* Check if the relation is valid against the constraints.
*
* @param rcs
* the rcs
* @param relation
* the relation
* @return true, if successful
*/
private boolean checkValid(final Set<RelationConstraint> rcs, final Relation relation) {
return rcs.stream()
.anyMatch(p -> p.matches(relation, symetric));
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(ImmutableSet.of(Relation.class), Collections.emptySet());
}
}