//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.templates; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.OptionalInt; import org.apache.commons.lang3.StringUtils; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import com.google.common.collect.ImmutableSet; import com.samskivert.mustache.DefaultCollector; import com.samskivert.mustache.Mustache; import com.samskivert.mustache.Mustache.Collector; import com.samskivert.mustache.Mustache.Compiler; import com.samskivert.mustache.Mustache.VariableFetcher; import com.samskivert.mustache.Template; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.types.templates.TemplateField; import uk.gov.dstl.baleen.types.templates.TemplateRecord; import uk.gov.dstl.baleen.uima.BaleenAnnotator; /** * Creates a new TemplateField with the given fieldName, based on fields in the * Record of the given recordName, using the supplied template. * <p> * Optionally, a source can be provided to disambiguate records/fields created * from multiple definition configurations. * </p> * <p> * The template is a simple mustache template, where the fields of the given * record are exposed as root level properties in the mustache context ready for * direct binding. * </p> * <p> * Example configuration: * </p> * * <pre> ... annotators: - class templates.TemplateAnnotator: ... - class templates.TempalteFieldJoiningAnnotator: fieldName: fullName record: report template: {{surname}}, {{firstName}} source: peopleReportDefinitions * </pre> * */ public class TemplateFieldJoiningAnnotator extends BaleenAnnotator { /** The Constant PARAM_TEMPLATE. */ public static final String PARAM_TEMPLATE = "template"; /** * The template to use to create the new value. */ @ConfigurationParameter(name = PARAM_TEMPLATE) private String mustacheTemplate; /** The Constant PARAM_RECORD. */ public static final String PARAM_RECORD = "record"; /** * The record to use when matching field names. */ @ConfigurationParameter(name = PARAM_RECORD) private String recordName; /** The Constant PARAM_FIELD_NAME. */ public static final String PARAM_FIELD_NAME = "fieldName"; /** * The field name to create. * * @baleen.config field */ @ConfigurationParameter(name = PARAM_FIELD_NAME, defaultValue = "field") private String fieldName; /** The Constant PARAM_SOURCE. */ public static final String PARAM_SOURCE = "source"; /** * The source type to search for the record. */ @ConfigurationParameter(name = PARAM_SOURCE, mandatory = false) private String source; /** The compiled template. */ private Template compiledTemplate; /** The touched fields. */ private Collection<String> touchedFields; @Override public void doInitialize(UimaContext aContext) throws ResourceInitializationException { super.doInitialize(aContext); touchedFields = gatherReferencedFields(); compiledTemplate = Mustache.compiler().compile(mustacheTemplate); } /** * Gather fields that are referenced in the mustache template. * * @return the collection */ private Collection<String> gatherReferencedFields() { Collection<String> fields = new ArrayList<>(); Collector collector = new DefaultCollector() { @Override public VariableFetcher createFetcher(Object ctx, String name) { fields.add(name); return super.createFetcher(ctx, name); } }; Compiler compiler = Mustache.compiler().defaultValue("").withCollector(collector); Template mockTemplate = compiler.compile(mustacheTemplate); mockTemplate.execute(new HashMap<>()); return fields; } @Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { Map<String, String> recordFieldValues = new HashMap<>(); Map<String, TemplateField> recordFields = new HashMap<>(); Collection<TemplateRecord> records = JCasUtil.select(jCas, TemplateRecord.class); for (TemplateRecord record : records) { if (!StringUtils.equals(recordName, record.getName()) || !StringUtils.isEmpty(source) && !source.equalsIgnoreCase(record.getSource())) { continue; } List<TemplateField> fields = JCasUtil.selectCovered(TemplateField.class, record); for (TemplateField field : fields) { // only keep fields used in the template - simplifies later // begin/end calculation if (!touchedFields.contains(field.getName())) { continue; } recordFieldValues.put(field.getName(), field.getCoveredText()); recordFields.put(field.getName(), field); } } OptionalInt min = recordFields.values().stream().mapToInt(TemplateField::getBegin).min(); OptionalInt max = recordFields.values().stream().mapToInt(TemplateField::getEnd).max(); if (min.isPresent() && max.isPresent()) { int begin = min.getAsInt(); int end = max.getAsInt(); TemplateField newField = new TemplateField(jCas); newField.setName(fieldName); newField.setBegin(begin); newField.setEnd(end); newField.setValue(compiledTemplate.execute(recordFieldValues)); addToJCasIndex(newField); } } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(ImmutableSet.of(TemplateRecord.class, TemplateField.class), ImmutableSet.of(TemplateField.class)); } }