//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.regex;
import java.util.Collections;
import java.util.regex.Matcher;
import org.apache.uima.jcas.JCas;
import com.google.common.collect.ImmutableSet;
import uk.gov.dstl.baleen.annotators.regex.helpers.AbstractRegexAnnotator;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.types.common.CommsIdentifier;
/**
* Annotate IP (v4) Addresses within a document using a regular expression
*
* <p>The document content is run through a regular expression matcher looking for things that match the following IP address regular expression:</p>
* <pre>\\b(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.){3}(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\b</pre>
*
*
*/
public class IpV4 extends AbstractRegexAnnotator<CommsIdentifier> {
private static final String IPV4_REGEX = "\\b(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.){3}(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\b";
/**
* New instance.
*/
public IpV4() {
super(IPV4_REGEX, false, 1.0f);
}
@Override
protected CommsIdentifier create(JCas jCas, Matcher matcher) {
CommsIdentifier ipaddress = new CommsIdentifier(jCas);
ipaddress.setSubType("ipv4address");
return ipaddress;
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(Collections.emptySet(), ImmutableSet.of(CommsIdentifier.class));
}
}