//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.regex.helpers; import java.util.Collections; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.uima.jcas.JCas; import com.google.common.collect.ImmutableSet; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.types.metadata.Metadata; /** Create a metadata annotator from a regular expression. * */ public abstract class AbstractMetadataRegexAnnotator extends AbstractRegexAnnotator<Metadata> { private final String key; private final int valueGroup; protected AbstractMetadataRegexAnnotator(Pattern regex, int matcherGroup, String key, int valueGroup) { super(regex, matcherGroup, 1.0); this.key = key; this.valueGroup = valueGroup; } /** New instance. * @param regex * @param matcherGroup * @param caseSensitive * @param confidence * @param key * @param valueGroup */ protected AbstractMetadataRegexAnnotator(String regex, int matcherGroup, boolean caseSensitive, String key, int valueGroup) { super(regex, matcherGroup, caseSensitive, 1.0); this.key = key; this.valueGroup = valueGroup; } /** Simplified constructor where entire regex is used and value is passed through. * @param regex * @param caseSensitive * @param key */ protected AbstractMetadataRegexAnnotator(String regex, boolean caseSensitive, String key) { this(regex, 0, caseSensitive, key, 0); } @Override protected Metadata create(JCas jCas, Matcher matcher) { Metadata md = new Metadata(jCas); md.setKey(key); String value = matcher.group(valueGroup); md.setValue(convertValue(value)); return md; } /** Convert the matcher value to the document info value. * @param value * @return */ protected String convertValue(String value) { return value; } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(Collections.emptySet(), ImmutableSet.of(Metadata.class)); } }