//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import uk.gov.dstl.baleen.annotators.regex.helpers.AbstractQuantityRegexAnnotator;
import uk.gov.dstl.baleen.uima.data.TextBlock;
/**
* Annotate times within a document using regular expressions
*
* <p>The document content is searched for things that might represent time periods using regular expressions.
* Any extracted times are normalized to seconds where possible (e.g. not months, because the length of a month can vary).
* Years are assumed not to be leap years.</p>
*
* <p>Any hour quantities that could be times, e.g. 2200hrs, are ignored.</p>
*
* @baleen.javadoc
*/
public class TimeQuantity extends AbstractQuantityRegexAnnotator {
public static final int YEAR_TO_SECOND = 31536000;
public static final int WEEK_TO_SECOND = 604800;
public static final int DAY_TO_SECOND = 86400;
public static final int HOUR_TO_SECOND = 3600;
public static final int MINUTE_TO_SECOND = 60;
private final Pattern yearPattern = Pattern.compile(
"\\b([0-9]+([0-9,]+[0-9])?)[ ]?(year|yr)(s)?\\b",
Pattern.CASE_INSENSITIVE);
private final Pattern monthPattern = Pattern.compile(
"\\b([0-9]+([0-9,]+[0-9])?)[ ]?(month)(s)?\\b",
Pattern.CASE_INSENSITIVE);
private final Pattern weekPattern = Pattern.compile(
"\\b([0-9]+([0-9,]+[0-9])?)[ ]?(week|wk)(s)?\\b",
Pattern.CASE_INSENSITIVE);
private final Pattern dayPattern = Pattern.compile(
"\\b([0-9]+([0-9,]+[0-9])?)[ ]?(day)(s)?\\b",
Pattern.CASE_INSENSITIVE);
private final Pattern hourPattern = Pattern.compile(
"\\b([0-9]+([0-9,]+[0-9])?)[ ]?(hour|hr)(s)?\\b",
Pattern.CASE_INSENSITIVE);
private final Pattern minutePattern = Pattern.compile(
"\\b([0-9]+([0-9,]+[0-9])?)[ ]?(minute|min)(s)?\\b",
Pattern.CASE_INSENSITIVE);
private final Pattern secondPattern = Pattern.compile(
"\\b([0-9]+([0-9,]+[0-9])?)[ ]?(second|sec)(s)?\\b",
Pattern.CASE_INSENSITIVE);
/**
* Constructor
*/
public TimeQuantity() {
super("s", "time");
}
@Override
public void doProcessTextBlock(TextBlock block) throws AnalysisEngineProcessException {
String text = block.getCoveredText();
process(block, text, yearPattern, "year", YEAR_TO_SECOND);
process(block, text, monthPattern, "month", 0);
process(block, text, weekPattern, "week", WEEK_TO_SECOND);
process(block, text, dayPattern, "day", DAY_TO_SECOND);
processHours(block, text);
process(block, text, minutePattern, "minute", MINUTE_TO_SECOND);
process(block, text, secondPattern, "s", 1);
}
private void processHours(TextBlock block, String text) {
Matcher matcher = hourPattern.matcher(text);
while(matcher.find()){
String q = matcher.group(1);
if(q.length() == 4 && Integer.parseInt(q.substring(0, 2)) <= 23 && Integer.parseInt(q.substring(2)) <= 59){
continue;
}
addQuantity(block, matcher, "hour", HOUR_TO_SECOND);
}
}
}