//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.cleaners;
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.resource.ResourceInitializationException;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import uk.gov.dstl.baleen.annotators.cleaners.helpers.AbstractNormalizeEntities;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.types.semantic.Entity;
import uk.gov.dstl.baleen.types.semantic.Temporal;
/**
* Edits the value field of Temporal entities and sets it to a value in a given format.
* The Temporal entities can be filtered by temporalType, and only those with a non-zero
* timestamp and scope of SINGLE will be considered.
*
* The start timestamp will be used to produce the formatted value.
*
* @baleen.javadoc
*/
public class NormalizeTemporal extends AbstractNormalizeEntities{
/**
* What is the format that the temporal entities should be normalized to? The default value follows the
* ISO8601 standard.
* @baleen.config yyyy'-'MM'-'dd'T'HH':'mm':'ss'Z'
*/
public static final String PARAM_DATE_FORMAT = "correctFormat";
@ConfigurationParameter(name = PARAM_DATE_FORMAT, defaultValue = "yyyy'-'MM'-'dd'T'HH':'mm':'ss'Z'")
String correctFormat;
/**
* If set, then the temporal type of the Temporal entity must match this (case insensitive) to be normalized.
* @baleen.config
*/
public static final String PARAM_TEMPORAL_TYPE = "temporalType";
@ConfigurationParameter(name = PARAM_TEMPORAL_TYPE, defaultValue="")
String type;
DateTimeFormatter formatter;
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
try{
formatter = DateTimeFormatter.ofPattern(correctFormat).withZone(ZoneOffset.UTC);
}catch(IllegalArgumentException iae){
getMonitor().error("Unable to parse correctFormat pattern", iae);
throw new ResourceInitializationException(iae);
}
}
@Override
protected String normalize(Entity e) {
Temporal t = (Temporal) e;
Instant i = Instant.ofEpochSecond(t.getTimestampStart());
return formatter.format(i);
}
@Override
protected boolean shouldNormalize(Entity e) {
if(!(e instanceof Temporal))
return false;
Temporal t = (Temporal) e;
return "SINGLE".equalsIgnoreCase(t.getScope()) && isTimestampSet(t) && matchesType(t);
}
/**
* Return false if we suspect the timestamp hasn't been set on a Temporal object
* (i.e. both the start and end are equal to 0), or true otherwise.
*/
private boolean isTimestampSet(Temporal t){
return t.getTimestampStart() != 0L && t.getTimestampStop() != 0L;
}
/**
* Return true if the list of types to act on has been set and it matches the type
* set on the Temporal object t, or if the list of types hasn't been set.
* Return false otherwise.
*/
private boolean matchesType(Temporal t){
if(Strings.isNullOrEmpty(type))
return true;
return type.equalsIgnoreCase(t.getTemporalType());
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(ImmutableSet.of(Temporal.class), Collections.emptySet());
}
}