//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.structural;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import com.google.common.collect.ImmutableSet;
import uk.gov.dstl.baleen.annotators.relations.helpers.AbstractRelationshipAnnotator;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.core.utils.ConfigUtils;
import uk.gov.dstl.baleen.types.semantic.Entity;
import uk.gov.dstl.baleen.types.semantic.Relation;
import uk.gov.dstl.baleen.types.structure.Structure;
import uk.gov.dstl.baleen.uima.utils.AnnotationHierarchyBuilder;
import uk.gov.dstl.baleen.uima.utils.StructureUtil;
import uk.gov.dstl.baleen.uima.utils.select.Nodes;
/**
* Extract relation using the provided structural queries.
*
* <p>
* The structural query supplied is run over the whole document, to identify
* structural features containing the required annotations. Then the source and
* target queries are applied to find the source and target within the
* structural annotation. A relation is then created for matched elements with
* the type information supplied.
* <p>
* A confidence to assign can be supplied.
*
* @baleen.javadoc
*/
public class StructuralRelation extends AbstractRelationshipAnnotator {
/**
* A list of structural types which will be considered during record path
* analysis.
* <p>
* Leave blank for all types.
*
* @baleen.config Paragraph,TableCell,ListItem,Aside, ...
*/
public static final String PARAM_TYPE_NAMES = "types";
/** The type names. */
@ConfigurationParameter(name = PARAM_TYPE_NAMES, mandatory = false)
private String[] typeNames;
/**
* The relation type to use
*
* @baleen.config
*/
public static final String PARAM_TYPE = "type";
@ConfigurationParameter(name = PARAM_TYPE, mandatory = true)
private String type;
/**
* The relation subType to use
*
* @baleen.config
*/
public static final String PARAM_SUB_TYPE = "subType";
@ConfigurationParameter(name = PARAM_SUB_TYPE, defaultValue = "")
private String subType;
/**
* The confidence to assign to the relation
*
* @baleen.config 1.0
*/
public static final String PARAM_CONFIDENCE = "confidence";
@ConfigurationParameter(name = PARAM_CONFIDENCE, defaultValue = "1.0")
private String confidenceString;
/**
* The query to isolate the related entities
*
* @baleen.config
*/
public static final String PARAM_QUERY = "query";
@ConfigurationParameter(name = PARAM_QUERY, mandatory = true)
private String query;
/**
* The source entity sub query used to find the source in the result of the
* query
*
* @baleen.config
*/
public static final String PARAM_SOURCE_QUERY = "sourceQuery";
@ConfigurationParameter(name = PARAM_SOURCE_QUERY, mandatory = true)
private String sourceQuery;
/**
* The target entity sub query used to find the source in the result of the
* query
*
* @baleen.config
*/
public static final String PARAM_TARGET_QUERY = "targetQuery";
@ConfigurationParameter(name = PARAM_TARGET_QUERY, mandatory = true)
private String targetQuery;
// Parse the confidence config parameter into this variable to avoid issues
// with parameter types
private Float confidence;
/** The structural classes. */
protected Set<Class<? extends Structure>> structuralClasses;
/** The annotation classes. */
protected Set<Class<? extends Annotation>> annotationClasses;
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
super.doInitialize(aContext);
annotationClasses = ImmutableSet.<Class<? extends Annotation>>builder()
.addAll(StructureUtil.getStructureClasses(typeNames))
.addAll(StructureUtil.getAnnotationClasses(Entity.class)).build();
confidence = ConfigUtils.stringToFloat(confidenceString, 1.0f);
}
@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {
Nodes<Annotation> select = AnnotationHierarchyBuilder.build(jCas, annotationClasses).select(query);
addRelationsToIndex(select.stream().flatMap(match -> {
Nodes<Annotation> sourceAnnotation = match.select(sourceQuery);
Nodes<Annotation> targetAnnotation = match.select(targetQuery);
if (sourceAnnotation.isEmpty() || targetAnnotation.isEmpty()) {
return Stream.empty();
}
Entity sourceEntity = (Entity) sourceAnnotation.first().getItem();
Entity targetEntity = (Entity) targetAnnotation.first().getItem();
int begin = match.getItem().getBegin();
int end = match.getItem().getEnd();
return Stream
.of(createRelation(jCas, sourceEntity, targetEntity, begin, end, type, subType, type, confidence));
}));
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(ImmutableSet.of(Entity.class), ImmutableSet.of(Relation.class));
}
}