//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.regex;
import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.jcas.JCas;
import com.google.common.collect.ImmutableSet;
import uk.gov.dstl.baleen.annotators.regex.helpers.AbstractRegexNPAnnotator;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.types.military.MilitaryPlatform;
/**
* Extract military platforms (i.e. naval ships) that begin with HMS,
* or one of the variants used by other commonwealth states (e.g. HMCS for Canadian ships).
*
* This annotator is uses Noun Phrases to detect the end of the phrase,
* and these must be present (e.g. language.OpenNLP should have been run prior).
*
* @baleen.javadoc
*/
public class Hms extends AbstractRegexNPAnnotator<MilitaryPlatform> {
private static final Pattern HMS_PATTERN = Pattern.compile("\\bH(\\.)?M(\\.)?((A|B|C|N(\\.)?Z|P(\\.)?N(\\.)?G|J|T(\\.)?S)(\\.)?)?S(\\.)? .*\\b");
/**
* Constructor method
*/
public Hms(){
super(HMS_PATTERN, 0, 1.0);
}
@Override
protected MilitaryPlatform create(JCas jCas, Matcher matcher) {
MilitaryPlatform mp = new MilitaryPlatform(jCas);
mp.setSubType("NAVAL");
return mp;
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(Collections.emptySet(), ImmutableSet.of(MilitaryPlatform.class));
}
}