//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.regex;
import static org.junit.Assert.assertEquals;
import org.apache.uima.fit.util.JCasUtil;
import org.junit.Test;
import uk.gov.dstl.baleen.annotators.regex.Date;
import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest;
import uk.gov.dstl.baleen.annotators.testing.TestEntity;
import uk.gov.dstl.baleen.types.semantic.Temporal;
/**
* Test the DateRange annotator
*/
public class DateTest extends AbstractAnnotatorTest{
public DateTest() {
super(Date.class);
}
@Test
public void testYears() throws Exception{
jCas.setDocumentText("Woolworths was a retail chain from 1909-2008. We had very hot summers in 2009-11. 1969 was wet, as was the year '16.");
processJCas();
assertEquals(2, JCasUtil.select(jCas, Temporal.class).size());
Temporal ts1 = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("1909-2008", ts1.getCoveredText());
assertEquals(-1924992000L, ts1.getTimestampStart());
assertEquals(1230768000L, ts1.getTimestampStop());
Temporal ts2 = JCasUtil.selectByIndex(jCas, Temporal.class, 1);
assertEquals("2009-11", ts2.getCoveredText());
assertEquals(1230768000L, ts2.getTimestampStart());
assertEquals(1325376000L, ts2.getTimestampStop());
}
@Test
public void testMonthYears() throws Exception{
jCas.setDocumentText("From January to November 2015, not a lot happened. From December 15-January '16, Christmas happened.");
processJCas();
assertEquals(2, JCasUtil.select(jCas, Temporal.class).size());
Temporal ts1 = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("January to November 2015", ts1.getCoveredText());
assertEquals(1420070400L, ts1.getTimestampStart());
assertEquals(1448928000L, ts1.getTimestampStop());
Temporal ts2 = JCasUtil.selectByIndex(jCas, Temporal.class, 1);
assertEquals("December 15-January '16", ts2.getCoveredText());
assertEquals(1448928000L, ts2.getTimestampStart());
assertEquals(1454284800L, ts2.getTimestampStop());
}
@Test
public void testDayMonthYears() throws Exception{
jCas.setDocumentText("He is on duty from 3-10 October 2016, whilst she was on duty 27th September - Monday 3 Oct 16. The Christmas break fell between 21st December 2016 and 2 January 17. On 2/3 January '17 there was a storm, and it rained on 2nd and 5th January 2017.");
processJCas();
assertEquals(6, JCasUtil.select(jCas, Temporal.class).size());
Temporal ts1 = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("3-10 October 2016", ts1.getCoveredText());
assertEquals(1475452800L, ts1.getTimestampStart());
assertEquals(1476144000L, ts1.getTimestampStop());
Temporal ts2 = JCasUtil.selectByIndex(jCas, Temporal.class, 1);
assertEquals("27th September - Monday 3 Oct 16", ts2.getCoveredText());
assertEquals(1474934400L, ts2.getTimestampStart());
assertEquals(1475539200L, ts2.getTimestampStop());
Temporal ts3 = JCasUtil.selectByIndex(jCas, Temporal.class, 2);
assertEquals("21st December 2016 and 2 January 17", ts3.getCoveredText());
assertEquals(1482278400L, ts3.getTimestampStart());
assertEquals(1483401600L, ts3.getTimestampStop());
Temporal ts4 = JCasUtil.selectByIndex(jCas, Temporal.class, 3);
assertEquals("2/3 January '17", ts4.getCoveredText());
assertEquals(1483315200L, ts4.getTimestampStart());
assertEquals(1483488000L, ts4.getTimestampStop());
Temporal ts5 = JCasUtil.selectByIndex(jCas, Temporal.class, 4);
assertEquals("2nd and 5th January 2017", ts5.getCoveredText());
assertEquals("2nd January 2017", ts5.getValue());
assertEquals(1483315200L, ts5.getTimestampStart());
assertEquals(1483401600L, ts5.getTimestampStop());
Temporal ts6 = JCasUtil.selectByIndex(jCas, Temporal.class, 5);
assertEquals("5th January 2017", ts6.getCoveredText());
assertEquals(1483574400L, ts6.getTimestampStart());
assertEquals(1483660800L, ts6.getTimestampStop());
}
@Test
public void testBadDayMonthYears() throws Exception{
jCas.setDocumentText("She worked from 1st - 30th February 2015");
processJCas();
assertEquals(1, JCasUtil.select(jCas, Temporal.class).size()); //Month will be extracted, but the invalid date won't be
Temporal t = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("February 2015", t.getCoveredText());
}
@Test
public void testDates() throws Exception{
jCas.setDocumentText("Today is Tuesday 4th October 2016, or October 4 2016, or 2016-10-04, or maybe even 4/10/16.");
processJCas();
assertEquals(4, JCasUtil.select(jCas, Temporal.class).size());
Temporal ts1 = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("Tuesday 4th October 2016", ts1.getCoveredText());
assertEquals(1475539200L, ts1.getTimestampStart());
assertEquals(1475625600L, ts1.getTimestampStop());
Temporal ts2 = JCasUtil.selectByIndex(jCas, Temporal.class, 1);
assertEquals("October 4 2016", ts2.getCoveredText());
assertEquals(1475539200L, ts2.getTimestampStart());
assertEquals(1475625600L, ts2.getTimestampStop());
Temporal ts3 = JCasUtil.selectByIndex(jCas, Temporal.class, 2);
assertEquals("2016-10-04", ts3.getCoveredText());
assertEquals(1475539200L, ts3.getTimestampStart());
assertEquals(1475625600L, ts3.getTimestampStop());
Temporal ts4 = JCasUtil.selectByIndex(jCas, Temporal.class, 3);
assertEquals("4/10/16", ts4.getCoveredText());
assertEquals(1475539200L, ts4.getTimestampStart());
assertEquals(1475625600L, ts4.getTimestampStop());
}
@Test
public void testAmericanDates() throws Exception{
jCas.setDocumentText("Is it 04/07/2017, or 07/04/2017? It could even be 23/12/2017!");
processJCas(Date.PARAM_AMERICAN_FORMAT, true);
assertEquals(3, JCasUtil.select(jCas, Temporal.class).size());
Temporal ts1 = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("04/07/2017", ts1.getCoveredText()); //7th April in American dates
assertEquals(1491523200L, ts1.getTimestampStart());
assertEquals(1491609600L, ts1.getTimestampStop());
Temporal ts2 = JCasUtil.selectByIndex(jCas, Temporal.class, 1);
assertEquals("07/04/2017", ts2.getCoveredText()); //4th July in American dates
assertEquals(1499126400L, ts2.getTimestampStart());
assertEquals(1499212800L, ts2.getTimestampStop());
Temporal ts3 = JCasUtil.selectByIndex(jCas, Temporal.class, 2);
assertEquals("23/12/2017", ts3.getCoveredText()); //Must be 23rd December
assertEquals(1513987200L, ts3.getTimestampStart());
assertEquals(1514073600L, ts3.getTimestampStop());
}
@Test
public void testMonth() throws Exception{
jCas.setDocumentText("It was during February 2015 that the event happened");
processJCas();
assertEquals(1, JCasUtil.select(jCas, Temporal.class).size());
Temporal t = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("February 2015", t.getCoveredText());
assertEquals(1422748800L, t.getTimestampStart());
assertEquals(1425168000L, t.getTimestampStop());
jCas.reset();
jCas.setDocumentText("It was during early February 2015 that the event happened");
processJCas();
assertEquals(1, JCasUtil.select(jCas, Temporal.class).size());
t = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("early February 2015", t.getCoveredText());
assertEquals(1422748800L, t.getTimestampStart());
assertEquals(1423612800L, t.getTimestampStop());
jCas.reset();
jCas.setDocumentText("It was during mid-February 2015 that the event happened");
processJCas();
assertEquals(1, JCasUtil.select(jCas, Temporal.class).size());
t = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("mid-February 2015", t.getCoveredText());
assertEquals(1423612800L, t.getTimestampStart());
assertEquals(1424476800L, t.getTimestampStop());
jCas.reset();
jCas.setDocumentText("It was during Late February 2015 that the event happened");
processJCas();
assertEquals(1, JCasUtil.select(jCas, Temporal.class).size());
t = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("Late February 2015", t.getCoveredText());
assertEquals(1424476800L, t.getTimestampStart());
assertEquals(1425168000L, t.getTimestampStop());
jCas.reset();
jCas.setDocumentText("It was at the end of February 2015 that the event happened");
processJCas();
assertEquals(1, JCasUtil.select(jCas, Temporal.class).size());
t = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("end of February 2015", t.getCoveredText());
assertEquals(1424649600L, t.getTimestampStart());
assertEquals(1425168000L, t.getTimestampStop());
jCas.reset();
jCas.setDocumentText("It was at the beginning of February 2015 that the event happened");
processJCas();
assertEquals(1, JCasUtil.select(jCas, Temporal.class).size());
t = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("beginning of February 2015", t.getCoveredText());
assertEquals(1422748800L, t.getTimestampStart());
assertEquals(1423180800L, t.getTimestampStop());
jCas.reset();
jCas.setDocumentText("It was during Feb. 2015 that the event happened");
processJCas();
assertEquals(1, JCasUtil.select(jCas, Temporal.class).size());
t = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("Feb. 2015", t.getCoveredText());
assertEquals(1422748800L, t.getTimestampStart());
assertEquals(1425168000L, t.getTimestampStop());
}
@Test
public void testYear() throws Exception{
jCas.setDocumentText("The year was 1997, which is the year after 1996 (a leap year). ABC1997, 1997ABC, and ABC1997ABC shouldn't be found!");
processJCas();
assertEquals(2, JCasUtil.select(jCas, Temporal.class).size());
Temporal t1 = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
assertEquals("1997", t1.getCoveredText());
assertEquals(852076800L, t1.getTimestampStart());
assertEquals(883612800L, t1.getTimestampStop());
Temporal t2 = JCasUtil.selectByIndex(jCas, Temporal.class, 1);
assertEquals("1996", t2.getCoveredText());
assertEquals(820454400L, t2.getTimestampStart());
assertEquals(852076800L, t2.getTimestampStop());
}
//The following tests were from the old Date regex, this shows we haven't lost capability with the rewrite
@Test
public void testFull() throws Exception{
jCas.setDocumentText("Today is Monday 25th February 2013.");
processJCas();
assertAnnotations(1, Temporal.class,
new TestEntity<>(0, "Monday 25th February 2013")
);
}
@Test
public void testShortYear() throws Exception{
jCas.setDocumentText("Today is Monday 25th February 13.");
processJCas();
assertAnnotations(1, Temporal.class,
new TestEntity<>(0, "Monday 25th February 13")
);
}
@Test
public void testShortDay() throws Exception{
jCas.setDocumentText("Today is Mon 25th February 2013.");
processJCas();
assertAnnotations(1, Temporal.class,
new TestEntity<>(0, "Mon 25th February 2013")
);
}
@Test
public void testNoDay() throws Exception{
jCas.setDocumentText("Today is 25th February 2013.");
processJCas();
assertAnnotations(1, Temporal.class,
new TestEntity<>(0, "25th February 2013")
);
}
@Test
public void testNoSuffix() throws Exception{
jCas.setDocumentText("Today is Monday 25 February 2013.");
processJCas();
assertAnnotations(1, Temporal.class,
new TestEntity<>(0, "Monday 25 February 2013")
);
}
@Test
public void testInWord() throws Exception{
jCas.setDocumentText("This is v2 Jul 2016.");
processJCas();
assertAnnotations(1, Temporal.class,
new TestEntity<>(0, "Jul 2016")
);
}
}