//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.regex.helpers; import java.util.Collections; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.google.common.collect.ImmutableSet; import uk.gov.dstl.baleen.annotators.helpers.QuantityUtils; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.types.common.Quantity; import uk.gov.dstl.baleen.uima.BaleenTextAwareAnnotator; import uk.gov.dstl.baleen.uima.data.TextBlock; /** * Abstract helper class for extracting quantities using Regex patterns, * and converting them into normalized quantities. */ public abstract class AbstractQuantityRegexAnnotator extends BaleenTextAwareAnnotator { private final String normalizedUnit; private final String type; /** * Constructor */ public AbstractQuantityRegexAnnotator(String normalizedUnit, String type){ this.normalizedUnit = normalizedUnit; this.type = type; } protected void process(TextBlock block, String text, Pattern pattern, String unit, double scale) { Matcher matcher = pattern.matcher(text); while(matcher.find()){ addQuantity(block, matcher, unit, scale); } } protected void addQuantity(TextBlock block, Matcher matcher, String unit, double scale) { Quantity quantity = QuantityUtils.createQuantity(block.getJCas(), matcher, unit, scale, normalizedUnit, type); if(quantity != null) { block.setBeginAndEnd(quantity, quantity.getBegin(), quantity.getEnd()); addToJCasIndex(quantity); } } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(Collections.emptySet(), ImmutableSet.of(Quantity.class)); } }