//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.structural; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.List; import java.util.regex.Pattern; import java.util.stream.Stream; import org.apache.uima.UIMAException; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import com.google.common.base.Strings; import com.google.common.collect.ImmutableSet; import uk.gov.dstl.baleen.annotators.relations.helpers.AbstractRelationshipAnnotator; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.core.utils.ConfigUtils; import uk.gov.dstl.baleen.exceptions.BaleenException; import uk.gov.dstl.baleen.types.semantic.Entity; import uk.gov.dstl.baleen.types.semantic.Relation; import uk.gov.dstl.baleen.types.structure.TableCell; import uk.gov.dstl.baleen.uima.utils.TypeSystemSingleton; import uk.gov.dstl.baleen.uima.utils.TypeUtils; /** * Extract relations form {@link Table}s. * <p> * This annotator looks for tables with columns matching the provided source and * target headings. The rows of those tables are processed to add a relation per * row of the provided type (and subType) between the entity in the source * columns and the entity in the target column. If the entities do not exist * already they are created with the provided source type and target type. * * @baleen.javadoc */ public class TableRelation extends AbstractRelationshipAnnotator { /** * Is the regular expression case sensitive? * * @baleen.config false */ public static final String PARAM_CASE_SENSITIVE = "caseSensitive"; @ConfigurationParameter(name = PARAM_CASE_SENSITIVE, defaultValue = "false") private boolean caseSensitive = false; /** * The regular expression to search for the source column * * @baleen.config */ public static final String SOURCE_PATTERN = "sourcePattern"; @ConfigurationParameter(name = SOURCE_PATTERN, mandatory = true) private String sourcePattern; /** * The regular expression to search for the target column * * @baleen.config */ public static final String TARGET_PATTERN = "targetPattern"; @ConfigurationParameter(name = TARGET_PATTERN, mandatory = true) private String targetPattern; /** * The source entity type to use * * @baleen.config uk.gov.dstl.baleen.types.semantic.Entity */ public static final String SOURCE_TYPE = "sourceType"; @ConfigurationParameter(name = SOURCE_TYPE, defaultValue = "uk.gov.dstl.baleen.types.semantic.Entity") private String sourceType; /** * The target entity type to use * * @baleen.config uk.gov.dstl.baleen.types.semantic.Entity */ public static final String TARGET_TYPE = "targetType"; @ConfigurationParameter(name = TARGET_TYPE, defaultValue = "uk.gov.dstl.baleen.types.semantic.Entity") private String targetType; /** * The relation type to use * * @baleen.config */ public static final String PARAM_TYPE = "type"; @ConfigurationParameter(name = PARAM_TYPE, mandatory = true) private String type; /** * The relation subType to use * * @baleen.config */ public static final String PARAM_SUB_TYPE = "subType"; @ConfigurationParameter(name = PARAM_SUB_TYPE, defaultValue = "") private String subType; /** * The confidence to assign to the relation * * @baleen.config 1.0 */ public static final String PARAM_CONFIDENCE = "confidence"; @ConfigurationParameter(name = PARAM_CONFIDENCE, defaultValue = "1.0") private String confidenceString; // Parse the confidence config parameter into this variable to avoid issues // with parameter types private Float confidence; private Constructor<? extends Entity> sourceConstructor = null; private Constructor<? extends Entity> targetConstructor = null; private Pattern source; private Pattern target; @Override public void doInitialize(final UimaContext aContext) throws ResourceInitializationException { confidence = ConfigUtils.stringToFloat(confidenceString, 1.0f); try { getMonitor().debug("The source regular expression is \"{}\"", sourcePattern); getMonitor().debug("The target regular expression is \"{}\"", targetPattern); if (!caseSensitive) { sourcePattern = "(?i)" + sourcePattern; targetPattern = "(?i)" + targetPattern; } source = Pattern.compile(sourcePattern); target = Pattern.compile(targetPattern); sourceConstructor = TypeUtils .getEntityClass(sourceType, JCasFactory.createJCas(TypeSystemSingleton.getTypeSystemDescriptionInstance())) .getConstructor(JCas.class); targetConstructor = TypeUtils .getEntityClass(targetType, JCasFactory.createJCas(TypeSystemSingleton.getTypeSystemDescriptionInstance())) .getConstructor(JCas.class); } catch (UIMAException | BaleenException | NoSuchMethodException | SecurityException e) { throw new ResourceInitializationException(e); } } @Override protected void extract(JCas jCas) throws AnalysisEngineProcessException { Stream<List<TableCell>> rows = new Tables(jCas).withColumn(source).withColumn(target).getFilteredRows(); addRelationsToIndex(rows.map(row -> { TableCell sourceCell = row.get(0); TableCell targetCell = row.get(1); Entity sourceEntity = getEntity(jCas, sourceCell, sourceConstructor); Entity targetEntity = getEntity(jCas, targetCell, targetConstructor); int begin = Math.min(sourceCell.getBegin(), targetCell.getBegin()); int end = Math.max(sourceCell.getEnd(), targetCell.getEnd()); return createRelation(jCas, sourceEntity, targetEntity, begin, end, type, subType, type, confidence); })); } private Entity getEntity(JCas jCas, TableCell cell, Constructor<? extends Entity> type) { List<? extends Entity> covered = JCasUtil.selectCovered(type.getDeclaringClass(), cell); if (!covered.isEmpty()) { return covered.get(0); } else { Entity entity; try { entity = type.newInstance(jCas); entity.setBegin(cell.getBegin()); entity.setEnd(cell.getEnd()); entity.setValue(cell.getCoveredText()); if (!Strings.isNullOrEmpty(subType)) { entity.setSubType(subType); } addToJCasIndex(entity); return entity; } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e) { throw new RuntimeException("Can not create entity type " + type.getName(), e); } } } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(ImmutableSet.of(sourceConstructor.getDeclaringClass(), targetConstructor.getDeclaringClass()), ImmutableSet.of(sourceConstructor.getDeclaringClass(), targetConstructor.getDeclaringClass(), Relation.class)); } }