//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.regex;
import java.time.DateTimeException;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import org.apache.uima.jcas.JCas;
import com.google.common.collect.ImmutableSet;
import uk.gov.dstl.baleen.annotators.helpers.DateTimeUtils;
import uk.gov.dstl.baleen.annotators.regex.helpers.AbstractRegexAnnotator;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.types.semantic.Temporal;
/**
* Annotate DTG (Date Time Groups) within a document using regular expressions
*
* <p>The document content is run through a regular expression matcher looking for things that match the following regular expression:</p>
* <pre>([0-9]{2})\\s*([0-9]{2})([0-9]{2})([A-IK-Z]|D\\*)\\s*(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\\s*([0-9]{2})</pre>
* <p>Matched DTGs are parsed as a date and annotated as Temporal entities.</p>
*
* @baleen.javadoc
*/
public class Dtg extends AbstractRegexAnnotator<Temporal> {
private static final Map<String, ZoneOffset> zoneMap = createTimeCodeMap();
private static final String DATETIME_REGEX = "([0-9]{2})\\s*([0-9]{2})([0-9]{2})([A-IK-Z]|D\\*)\\s*(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\\s*([0-9]{2})";
/** New instance.
*
*/
public Dtg() {
super(DATETIME_REGEX, false, 1.0);
}
@Override
protected Temporal create(JCas jCas, Matcher matcher) {
long timestamp = 0L;
try{
ZonedDateTime zdt = ZonedDateTime.of(
2000 + Integer.parseInt(matcher.group(6)),
DateTimeUtils.asMonth(matcher.group(5)).getValue(),
Integer.parseInt(matcher.group(1)),
Integer.parseInt(matcher.group(2)),
Integer.parseInt(matcher.group(3)),
0,0, militaryTimeCodeToOffset(matcher.group(4)));
timestamp = zdt.toEpochSecond();
}catch(DateTimeException dte){
getMonitor().warn("Unable to parse DTG", dte);
return null;
}
Temporal dtg = new Temporal(jCas);
dtg.setPrecision("EXACT");
dtg.setScope("SINGLE");
dtg.setTemporalType("DATETIME");
dtg.setTimestampStart(timestamp);
dtg.setTimestampStop(timestamp + 60);
return dtg;
}
private static Map<String, ZoneOffset> createTimeCodeMap(){
Map<String, ZoneOffset> map = new HashMap<>();
map.put("A", ZoneOffset.ofHours(1));
map.put("B", ZoneOffset.ofHours(2));
map.put("C", ZoneOffset.ofHours(3));
map.put("D", ZoneOffset.ofHours(4));
map.put("D*", ZoneOffset.ofHoursMinutes(4, 30));
map.put("E", ZoneOffset.ofHours(5));
map.put("F", ZoneOffset.ofHours(6));
map.put("G", ZoneOffset.ofHours(7));
map.put("H", ZoneOffset.ofHours(8));
map.put("I", ZoneOffset.ofHours(9));
map.put("K", ZoneOffset.ofHours(10));
map.put("L", ZoneOffset.ofHours(11));
map.put("M", ZoneOffset.ofHours(12));
map.put("N", ZoneOffset.ofHours(-1));
map.put("O", ZoneOffset.ofHours(-2));
map.put("P", ZoneOffset.ofHours(-3));
map.put("Q", ZoneOffset.ofHours(-4));
map.put("R", ZoneOffset.ofHours(-5));
map.put("S", ZoneOffset.ofHours(-6));
map.put("T", ZoneOffset.ofHours(-7));
map.put("U", ZoneOffset.ofHours(-8));
map.put("V", ZoneOffset.ofHours(-9));
map.put("W", ZoneOffset.ofHours(-10));
map.put("X", ZoneOffset.ofHours(-11));
map.put("Y", ZoneOffset.ofHours(-12));
return map;
}
private static ZoneOffset militaryTimeCodeToOffset(String timeCode){
return zoneMap.getOrDefault(timeCode.toUpperCase(), ZoneOffset.UTC);
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(Collections.emptySet(), ImmutableSet.of(Temporal.class));
}
}