//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.regex; import java.time.DayOfWeek; import java.time.LocalDate; import java.time.Month; import java.time.Year; import java.time.YearMonth; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.Collection; import java.util.Collections; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.util.JCasUtil; import com.google.common.collect.ImmutableSet; import uk.gov.dstl.baleen.annotators.helpers.DateTimeUtils; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.types.metadata.Metadata; import uk.gov.dstl.baleen.types.semantic.Temporal; import uk.gov.dstl.baleen.uima.BaleenTextAwareAnnotator; import uk.gov.dstl.baleen.uima.data.TextBlock; /** * Extract expressions that refer to a relative date, e.g. yesterday. * These can be resolved by providing a metadata field to * check for the date that expressions are relative to. * * Supported expressions are of the form: * <ul> * <li>day before yesterday</li> * <li>yesterday</li> * <li>today</li> * <li>tomorrow</li> * <li>day after tomorrow</li> * <li>this week</li> * <li>this month</li> * <li>this year</li> * <li>next Wednesday</li> * <li>last Wednesday</li> * <li>last week</li> * <li>next week</li> * <li>in the last week</li> * <li>in the next week</li> * <li>Monday last week</li> * <li>Monday next week</li> * <li>last month</li> * <li>next month</li> * <li>in the last month</li> * <li>in the next month</li> * <li>last year</li> * <li>next year</li> * <li>October last year</li> * <li>October next year</li> * <li>in the last year</li> * <li>in the next year</li> * <li>in the last x days/weeks/months/years</li> * </ul> * * @baleen.javadoc */ public class RelativeDate extends BaleenTextAwareAnnotator { /** * The format of dates in the metadata fields * * @baleen.config yyyy-MM-dd */ public static final String PARAM_DATE_FORMAT = "dateFormat"; @ConfigurationParameter(name = PARAM_DATE_FORMAT, defaultValue="yyyy-MM-dd") private String dateFormat; /** * List of field names, in order of precedence, * to use when looking for a date to make other dates relative to * * @baleen.config date,documentDate */ public static final String PARAM_METADATA_FIELDS = "metadataFields"; @ConfigurationParameter(name = PARAM_METADATA_FIELDS, defaultValue={"date","documentDate"}) private String[] metadataFields; private static final String DAYS = "(Mon|Monday|Tue|Tues|Tuesday|Wed|Wednesday|Thu|Thurs|Thursday|Fri|Friday|Sat|Saturday|Sun|Sunday)"; private static final String MONTHS = "(January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|Jul|August|Aug|September|Sept|Sep|October|Oct|November|Nov|December|Dec)"; private static final String RELATIVE = "RELATIVE"; private static final String SINGLE = "SINGLE"; private static final String DATE = "DATE"; LocalDate relativeTo = null; @Override protected void doProcessTextBlock(TextBlock block) throws AnalysisEngineProcessException { relativeTo = null; DateTimeFormatter dtf = null; try{ dtf = DateTimeFormatter.ofPattern(dateFormat); }catch(IllegalArgumentException iae){ getMonitor().error("Invalid date format, no relative date will be set", iae); } if(dtf != null){ Collection<Metadata> md = JCasUtil.select(block.getJCas(), Metadata.class); for(String field : metadataFields){ for(Metadata m : md){ if(m.getKey().equals(field)){ try{ relativeTo = LocalDate.parse(m.getValue(), dtf); break; }catch(DateTimeParseException dtpe){ getMonitor().warn("Metadata field {} found, but content ({}) wasn't parseable", m.getKey(), m.getValue()); } } } if(relativeTo != null) break; } } yesterday(block); today(block); tomorrow(block); thisX(block); nextLastDay(block); nextLastWeek(block); nextLastMonth(block); nextLastYear(block); inTheNextLastX(block); } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(Collections.emptySet(), ImmutableSet.of(Temporal.class)); } private void yesterday(TextBlock block){ Pattern p = Pattern.compile("\\b(day before )?yesterday\\b", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(block.getCoveredText()); while(m.find()){ if(m.group(1) != null){ createRelativeDay(block, m.start(), m.end(), -2); }else{ createRelativeDay(block, m.start(), m.end(), -1); } } } private void today(TextBlock block){ Pattern p = Pattern.compile("\\btoday\\b", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(block.getCoveredText()); while(m.find()){ createRelativeDay(block, m.start(), m.end(), 0); } } private void tomorrow(TextBlock block){ Pattern p = Pattern.compile("\\b(day after )?tomorrow\\b", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(block.getCoveredText()); while(m.find()){ if(m.group(1) != null){ createRelativeDay(block, m.start(), m.end(), 2); }else{ createRelativeDay(block, m.start(), m.end(), 1); } } } private void thisX(TextBlock block){ Pattern p = Pattern.compile("\\bthis (week|month|year)\\b", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(block.getCoveredText()); while(m.find()){ if("week".equalsIgnoreCase(m.group(1))){ createRelativeWeek(block, m.start(), m.end(), 0); }else if("month".equalsIgnoreCase(m.group(1))){ createRelativeMonth(block, m.start(), m.end(), 0); }else if("year".equalsIgnoreCase(m.group(1))){ createRelativeYear(block, m.start(), m.end(), 0); } } } private void nextLastDay(TextBlock block){ Pattern p = Pattern.compile("\\b(next|last) "+DAYS+"\\b", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(block.getCoveredText()); while(m.find()){ Integer offset = null; if(relativeTo != null){ if("next".equalsIgnoreCase(m.group(1))){ for(int i = 1; i <= 7; i++){ if(relativeTo.plusDays(i).getDayOfWeek() == DateTimeUtils.asDay(m.group(2))){ offset = i; break; } } }else{ for(int i = 1; i <= 7; i++){ if(relativeTo.minusDays(i).getDayOfWeek() == DateTimeUtils.asDay(m.group(2))){ offset = -i; break; } } } } createRelativeDay(block, m.start(), m.end(), offset); } } private void nextLastWeek(TextBlock block){ Pattern p = Pattern.compile("\\b((in the|within the|"+DAYS+") )?(next|last) week\\b", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(block.getCoveredText()); while(m.find()){ if(m.group(3) != null){ if("next".equalsIgnoreCase(m.group(4))){ createRelativeWeekDay(block, m.start(), m.end(), 1, DateTimeUtils.asDay(m.group(3))); }else{ createRelativeWeekDay(block, m.start(), m.end(), -1, DateTimeUtils.asDay(m.group(3))); } }else if(m.group(2) != null){ if("next".equalsIgnoreCase(m.group(4))){ createRelativeWeekPeriod(block, m.start(), m.end(), 1); }else{ createRelativeWeekPeriod(block, m.start(), m.end(), -1); } }else{ if("next".equalsIgnoreCase(m.group(4))){ createRelativeWeek(block, m.start(), m.end(), 1); }else{ createRelativeWeek(block, m.start(), m.end(), -1); } } } } private void nextLastMonth(TextBlock block){ Pattern p = Pattern.compile("\\b((in the|within the) )?(next|last) month\\b", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(block.getCoveredText()); while(m.find()){ if(m.group(2) != null){ if("next".equalsIgnoreCase(m.group(3))){ createRelativeMonthPeriod(block, m.start(), m.end(), 1); }else{ createRelativeMonthPeriod(block, m.start(), m.end(), -1); } }else{ if("next".equalsIgnoreCase(m.group(3))){ createRelativeMonth(block, m.start(), m.end(), 1); }else{ createRelativeMonth(block, m.start(), m.end(), -1); } } } } private void nextLastYear(TextBlock block){ Pattern p = Pattern.compile("\\b((in the|within the|"+MONTHS+") )?(next|last) year\\b", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(block.getCoveredText()); while(m.find()){ if(m.group(3) != null){ if("next".equalsIgnoreCase(m.group(4))){ createRelativeYearMonth(block, m.start(), m.end(), 1, DateTimeUtils.asMonth(m.group(3))); }else{ createRelativeYearMonth(block, m.start(), m.end(), -1, DateTimeUtils.asMonth(m.group(3))); } }else if(m.group(2) != null){ if("next".equalsIgnoreCase(m.group(4))){ createRelativeYearPeriod(block, m.start(), m.end(), 1); }else{ createRelativeYearPeriod(block, m.start(), m.end(), -1); } }else{ if("next".equalsIgnoreCase(m.group(4))){ createRelativeYear(block, m.start(), m.end(), 1); }else{ createRelativeYear(block, m.start(), m.end(), -1); } } } } private void inTheNextLastX(TextBlock block){ Pattern p = Pattern.compile("\\b(in|within) the (next|last) (\\d+) (day|week|month|year)s\\b", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(block.getCoveredText()); while(m.find()){ Integer offset = Integer.parseInt(m.group(3)); if("last".equalsIgnoreCase(m.group(2))){ offset = -offset; } if("day".equalsIgnoreCase(m.group(4))){ createRelativeDayPeriod(block, m.start(), m.end(), offset); }else if("week".equalsIgnoreCase(m.group(4))){ createRelativeWeekPeriod(block, m.start(), m.end(), offset); }else if("month".equalsIgnoreCase(m.group(4))){ createRelativeMonthPeriod(block, m.start(), m.end(), offset); }else if("year".equalsIgnoreCase(m.group(4))){ createRelativeYearPeriod(block, m.start(), m.end(), offset); } } } private void createRelativeDayPeriod(TextBlock block, Integer charBegin, Integer charEnd, Integer dayOffset){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && dayOffset != null){ if(dayOffset > 0){ t.setTimestampStart(relativeTo.atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(relativeTo.plusDays(dayOffset + 1L).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); }else{ t.setTimestampStart(relativeTo.plusDays(dayOffset).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(relativeTo.plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } } addToJCasIndex(t); } private void createRelativeDay(TextBlock block, Integer charBegin, Integer charEnd, Integer dayOffset){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && dayOffset != null){ LocalDate d = relativeTo.plusDays(dayOffset); t.setTimestampStart(d.atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(d.plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } addToJCasIndex(t); } private void createRelativeWeekPeriod(TextBlock block, Integer charBegin, Integer charEnd, Integer weekOffset){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && weekOffset != null){ if(weekOffset > 0){ t.setTimestampStart(relativeTo.atStartOfDay().toEpochSecond(ZoneOffset.UTC)); t.setTimestampStop(relativeTo.plusWeeks(weekOffset).plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); }else{ t.setTimestampStart(relativeTo.plusWeeks(weekOffset).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(relativeTo.plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } } addToJCasIndex(t); } private void createRelativeWeek(TextBlock block, Integer charBegin, Integer charEnd, Integer weekOffset){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && weekOffset != null){ LocalDate startOfWeek = relativeTo.plusWeeks(weekOffset); while(startOfWeek.getDayOfWeek() != DayOfWeek.MONDAY){ startOfWeek = startOfWeek.minusDays(1); } t.setTimestampStart(startOfWeek.atStartOfDay(ZoneOffset.UTC).toEpochSecond()); LocalDate endOfWeek = startOfWeek.plusWeeks(1); t.setTimestampStop(endOfWeek.atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } addToJCasIndex(t); } private void createRelativeWeekDay(TextBlock block, Integer charBegin, Integer charEnd, Integer weekOffset, DayOfWeek day){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && weekOffset != null){ LocalDate dayOfWeek = relativeTo.plusWeeks(weekOffset); while(dayOfWeek.getDayOfWeek() != DayOfWeek.MONDAY){ dayOfWeek = dayOfWeek.minusDays(1); } while(dayOfWeek.getDayOfWeek() != day){ dayOfWeek = dayOfWeek.plusDays(1); } t.setTimestampStart(dayOfWeek.atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(dayOfWeek.plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } addToJCasIndex(t); } private void createRelativeMonthPeriod(TextBlock block, Integer charBegin, Integer charEnd, Integer monthOffset){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && monthOffset != null){ if(monthOffset > 0){ t.setTimestampStart(relativeTo.atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(relativeTo.plusMonths(monthOffset).plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); }else{ t.setTimestampStart(relativeTo.plusMonths(monthOffset).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(relativeTo.plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } } addToJCasIndex(t); } private void createRelativeMonth(TextBlock block, Integer charBegin, Integer charEnd, Integer monthOffset){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && monthOffset != null){ YearMonth ym = YearMonth.from(relativeTo).plusMonths(monthOffset); t.setTimestampStart(ym.atDay(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(ym.plusMonths(1).atDay(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } addToJCasIndex(t); } private void createRelativeYearPeriod(TextBlock block, Integer charBegin, Integer charEnd, Integer yearOffset){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && yearOffset != null){ if(yearOffset > 0){ t.setTimestampStart(relativeTo.atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(relativeTo.plusYears(yearOffset).plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); }else{ t.setTimestampStart(relativeTo.plusYears(yearOffset).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(relativeTo.plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } } addToJCasIndex(t); } private void createRelativeYear(TextBlock block, Integer charBegin, Integer charEnd, Integer yearOffset){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && yearOffset != null){ Year y = Year.from(relativeTo).plusYears(yearOffset); t.setTimestampStart(y.atDay(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(y.plusYears(1).atDay(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } addToJCasIndex(t); } private void createRelativeYearMonth(TextBlock block, Integer charBegin, Integer charEnd, Integer yearOffset, Month month){ Temporal t = new Temporal(block.getJCas()); block.setBeginAndEnd(t, charBegin, charEnd); t.setConfidence(1.0); t.setPrecision(RELATIVE); t.setScope(SINGLE); t.setTemporalType(DATE); if(relativeTo != null && yearOffset != null){ Year y = Year.from(relativeTo).plusYears(yearOffset); YearMonth ym = y.atMonth(month); t.setTimestampStart(ym.atDay(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); t.setTimestampStop(ym.atEndOfMonth().plusDays(1).atStartOfDay(ZoneOffset.UTC).toEpochSecond()); } addToJCasIndex(t); } }