package edu.stanford.nlp.time; import java.io.Serializable; import java.util.Calendar; import java.util.Map; import java.util.regex.Pattern; import edu.stanford.nlp.util.Pair; import org.w3c.dom.Element; /** * Stores one TIMEX3 expression. This class is used for both TimeAnnotator and * GUTimeAnnotator for storing information for TIMEX3 tags. * * <p> * Example text with TIMEX3 annotation:<br> * <code>In Washington <TIMEX3 tid="t1" TYPE="DATE" VAL="PRESENT_REF" * temporalFunction="true" valueFromFunction="tf1" * anchorTimeID="t0">today</TIMEX3>, the Federal Aviation Administration * released air traffic control tapes from the night the TWA Flight eight * hundred went down. * </code> * <p> * <br> * TIMEX3 specification: * <br> * <pre><code> * attributes ::= tid type [functionInDocument] [beginPoint] [endPoint] * [quant] [freq] [temporalFunction] (value | valueFromFunction) * [mod] [anchorTimeID] [comment] * * tid ::= ID * {tid ::= TimeID * TimeID ::= t<integer>} * type ::= 'DATE' | 'TIME' | 'DURATION' | 'SET' * beginPoint ::= IDREF * {beginPoint ::= TimeID} * endPoint ::= IDREF * {endPoint ::= TimeID} * quant ::= CDATA * freq ::= Duration * functionInDocument ::= 'CREATION_TIME' | 'EXPIRATION_TIME' | 'MODIFICATION_TIME' | * 'PUBLICATION_TIME' | 'RELEASE_TIME'| 'RECEPTION_TIME' | * 'NONE' {default, if absent, is 'NONE'} * temporalFunction ::= 'true' | 'false' {default, if absent, is 'false'} * {temporalFunction ::= boolean} * value ::= Duration | Date | Time | WeekDate | WeekTime | Season | PartOfYear | PaPrFu * valueFromFunction ::= IDREF * {valueFromFunction ::= TemporalFunctionID * TemporalFunctionID ::= tf<integer>} * mod ::= 'BEFORE' | 'AFTER' | 'ON_OR_BEFORE' | 'ON_OR_AFTER' |'LESS_THAN' | 'MORE_THAN' | * 'EQUAL_OR_LESS' | 'EQUAL_OR_MORE' | 'START' | 'MID' | 'END' | 'APPROX' * anchorTimeID ::= IDREF * {anchorTimeID ::= TimeID} * comment ::= CDATA * </code></pre> * * <p> * References * <br> * Guidelines: <a href="http://www.timeml.org/tempeval2/tempeval2-trial/guidelines/timex3guidelines-072009.pdf"> * http://www.timeml.org/tempeval2/tempeval2-trial/guidelines/timex3guidelines-072009.pdf</a> * <br> * Specifications: <a href="http://www.timeml.org/site/publications/timeMLdocs/timeml_1.2.1.html#timex3"> * http://www.timeml.org/site/publications/timeMLdocs/timeml_1.2.1.html#timex3</a> * <br> * XSD: <a href="http://www.timeml.org/timeMLdocs/TimeML.xsd">http://www.timeml.org/timeMLdocs/TimeML.xsd</a> **/ public class Timex implements Serializable { private static final long serialVersionUID = 385847729549981302L; /** * XML representation of the TIMEX tag */ private String xml; /** * TIMEX3 value attribute - Time value (given in extended ISO 8601 format). */ private String val; /** * Alternate representation for time value (not part of TIMEX3 standard). * used when value of the time expression cannot be expressed as a standard TIMEX3 value. */ private String altVal; /** * Actual text that make up the time expression */ private String text; /** * TIMEX3 type attribute - Type of the time expression (DATE, TIME, DURATION, or SET) */ private String type; /** * TIMEX3 tid attribute - TimeID. ID to identify this time expression. * Should have the format of {@code t<integer>} */ private String tid; // TODO: maybe its easier if these are just strings... /** * TIMEX3 beginPoint attribute - integer indicating the TimeID of the begin time * that anchors this duration/range (-1 is not present). */ private int beginPoint; /** * TIMEX3 beginPoint attribute - integer indicating the TimeID of the end time * that anchors this duration/range (-1 is not present). */ private int endPoint; /** * Range begin/end/duration * (this is not part of the timex standard and is typically null, available if sutime.includeRange is true) */ private Range range; public static class Range implements Serializable { private static final long serialVersionUID = 1L; public String begin; public String end; public String duration; public Range(String begin, String end, String duration) { this.begin = begin; this.end = end; this.duration = duration; } } public String value() { return val; } public String altVal() { return altVal; } public String text() { return text; } public String timexType() { return type; } public String tid() { return tid; } public Range range() { return range; } public Timex() { } public Timex(Element element) { this.val = null; this.beginPoint = -1; this.endPoint = -1; /* * ByteArrayOutputStream os = new ByteArrayOutputStream(); Serializer ser = * new Serializer(os, "UTF-8"); ser.setIndent(2); // this is the default in * JDOM so let's keep the same ser.setMaxLength(0); // no line wrapping for * content ser.write(new Document(element)); */ init(element); } public Timex(String val) { this(null, val); } public Timex(String type, String val) { this.val = val; this.type = type; this.beginPoint = -1; this.endPoint = -1; this.xml = (val == null ? "<TIMEX3/>" : String.format("<TIMEX3 VAL=\"%s\" TYPE=\"%s\"/>", this.val, this.type)); } public Timex(String type, String val, String altVal, String tid, String text, int beginPoint, int endPoint) { this.type = type; this.val = val; this.altVal = altVal; this.tid = tid; this.text = text; this.beginPoint = beginPoint; this.endPoint = endPoint; } private void init(Element element) { init(XMLUtils.nodeToString(element, false), element); } private void init(String xml, Element element) { this.xml = xml; this.text = element.getTextContent(); // Mandatory attributes this.tid = XMLUtils.getAttribute(element, "tid"); this.val = XMLUtils.getAttribute(element, "VAL"); if (this.val == null) { this.val = XMLUtils.getAttribute(element, "value"); } this.altVal = XMLUtils.getAttribute(element, "alt_value"); this.type = XMLUtils.getAttribute(element, "type"); if (type == null) { this.type = XMLUtils.getAttribute(element, "TYPE"); } // if (this.type != null) { // this.type = this.type.intern(); // } // Optional attributes String beginPoint = XMLUtils.getAttribute(element, "beginPoint"); this.beginPoint = (beginPoint == null || beginPoint.length() == 0)? -1 : Integer.parseInt(beginPoint.substring(1)); String endPoint = XMLUtils.getAttribute(element, "endPoint"); this.endPoint = (endPoint == null || endPoint.length() == 0)? -1 : Integer.parseInt(endPoint.substring(1)); // Optional range String rangeStr = XMLUtils.getAttribute(element, "range"); if (rangeStr != null) { if (rangeStr.startsWith("(") && rangeStr.endsWith(")")) { rangeStr = rangeStr.substring(1, rangeStr.length()-1); } String[] parts = rangeStr.split(","); this.range = new Range(parts.length > 0? parts[0]:"", parts.length > 1? parts[1]:"", parts.length > 2? parts[2]:""); } } public int beginPoint() { return beginPoint; } public int endPoint() { return endPoint; } public String toString() { return this.xml; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } Timex timex = (Timex) o; if (beginPoint != timex.beginPoint) { return false; } if (endPoint != timex.endPoint) { return false; } if (type != null ? !type.equals(timex.type) : timex.type != null) { return false; } if (val != null ? !val.equals(timex.val) : timex.val != null) { return false; } return true; } @Override public int hashCode() { int result = val != null ? val.hashCode() : 0; result = 31 * result + (type != null ? type.hashCode() : 0); result = 31 * result + beginPoint; result = 31 * result + endPoint; return result; } public Element toXmlElement() { Element element = XMLUtils.createElement("TIMEX3"); if (tid != null) { element.setAttribute("tid", tid); } if (value() != null) { element.setAttribute("value", val); } if (altVal != null) { element.setAttribute("altVal", altVal); } if (type != null) { element.setAttribute("type", type); } if (beginPoint != -1) { element.setAttribute("beginPoint", "t" + String.valueOf(beginPoint)); } if (endPoint != -1) { element.setAttribute("endPoint", "t" + String.valueOf(endPoint)); } if (text != null) { element.setTextContent(text); } return element; } // Used to create timex from XML (mainly for testing) public static Timex fromXml(String xml) { Element element = XMLUtils.parseElement(xml); if ("TIMEX3".equals(element.getNodeName())) { Timex t = new Timex(); // t.init(xml, element); // Doesn't preserve original input xml // Will reorder attributes of xml so can match xml of test timex and actual timex // (for which we can't control the order of the attributes now we don't use nu.xom...) t.init(element); return t; } else { throw new IllegalArgumentException("Invalid timex xml: " + xml); } } public static Timex fromMap(String text, Map<String, String> map) { try { Element element = XMLUtils.createElement("TIMEX3"); for (Map.Entry<String, String> entry : map.entrySet()) { if (entry.getValue() != null) { element.setAttribute(entry.getKey(), entry.getValue()); } } element.setTextContent(text); return new Timex(element); } catch (Exception ex) { throw new RuntimeException(ex); } } /** * Gets the Calendar matching the year, month and day of this Timex. * * @return The matching Calendar. */ public Calendar getDate() { if (Pattern.matches("\\d\\d\\d\\d-\\d\\d-\\d\\d", this.val)) { int year = Integer.parseInt(this.val.substring(0, 4)); int month = Integer.parseInt(this.val.substring(5, 7)); int day = Integer.parseInt(this.val.substring(8, 10)); return makeCalendar(year, month, day); } else if (Pattern.matches("\\d\\d\\d\\d\\d\\d\\d\\d", this.val)) { int year = Integer.parseInt(this.val.substring(0, 4)); int month = Integer.parseInt(this.val.substring(4, 6)); int day = Integer.parseInt(this.val.substring(6, 8)); return makeCalendar(year, month, day); } throw new UnsupportedOperationException(String.format("%s is not a fully specified date", this)); } /** * Gets two Calendars, marking the beginning and ending of this Timex's range. * * @return The begin point and end point Calendars. */ public Pair<Calendar, Calendar> getRange() { return this.getRange(null); } /** * Gets two Calendars, marking the beginning and ending of this Timex's range. * * @param documentTime * The time the document containing this Timex was written. (Not * necessary for resolving all Timex expressions. This may be * {@code null}, but then relative time expressions cannot be * resolved.) * @return The begin point and end point Calendars. */ public Pair<Calendar, Calendar> getRange(Timex documentTime) { if (this.val == null) { throw new UnsupportedOperationException("no value specified for " + this); } // YYYYMMDD or YYYYMMDDT... where the time is concatenated directly with the // date else if (val.length() >= 8 && Pattern.matches("\\d\\d\\d\\d\\d\\d\\d\\d", this.val.substring(0, 8))) { int year = Integer.parseInt(this.val.substring(0, 4)); int month = Integer.parseInt(this.val.substring(4, 6)); int day = Integer.parseInt(this.val.substring(6, 8)); return new Pair<>(makeCalendar(year, month, day), makeCalendar(year, month, day)); } // YYYY-MM-DD or YYYY-MM-DDT... else if (val.length() >= 10 && Pattern.matches("\\d\\d\\d\\d-\\d\\d-\\d\\d", this.val.substring(0, 10))) { int year = Integer.parseInt(this.val.substring(0, 4)); int month = Integer.parseInt(this.val.substring(5, 7)); int day = Integer.parseInt(this.val.substring(8, 10)); return new Pair<>(makeCalendar(year, month, day), makeCalendar(year, month, day)); } // YYYYMMDDL+ else if (Pattern.matches("\\d\\d\\d\\d\\d\\d\\d\\d[A-Z]+", this.val)) { int year = Integer.parseInt(this.val.substring(0, 4)); int month = Integer.parseInt(this.val.substring(4, 6)); int day = Integer.parseInt(this.val.substring(6, 8)); return new Pair<>(makeCalendar(year, month, day), makeCalendar(year, month, day)); } // YYYYMM or YYYYMMT... else if (val.length() >= 6 && Pattern.matches("\\d\\d\\d\\d\\d\\d", this.val.substring(0, 6))) { int year = Integer.parseInt(this.val.substring(0, 4)); int month = Integer.parseInt(this.val.substring(4, 6)); Calendar begin = makeCalendar(year, month, 1); int lastDay = begin.getActualMaximum(Calendar.DATE); Calendar end = makeCalendar(year, month, lastDay); return new Pair<>(begin, end); } // YYYY-MM or YYYY-MMT... else if (val.length() >= 7 && Pattern.matches("\\d\\d\\d\\d-\\d\\d", this.val.substring(0, 7))) { int year = Integer.parseInt(this.val.substring(0, 4)); int month = Integer.parseInt(this.val.substring(5, 7)); Calendar begin = makeCalendar(year, month, 1); int lastDay = begin.getActualMaximum(Calendar.DATE); Calendar end = makeCalendar(year, month, lastDay); return new Pair<>(begin, end); } // YYYY or YYYYT... else if (val.length() >= 4 && Pattern.matches("\\d\\d\\d\\d", this.val.substring(0, 4))) { int year = Integer.parseInt(this.val.substring(0, 4)); return new Pair<>(makeCalendar(year, 1, 1), makeCalendar(year, 12, 31)); } // PDDY if (Pattern.matches("P\\d+Y", this.val) && documentTime != null) { Calendar rc = documentTime.getDate(); int yearRange = Integer.parseInt(this.val.substring(1, this.val.length() - 1)); // in the future if (this.beginPoint < this.endPoint) { Calendar start = copyCalendar(rc); Calendar end = copyCalendar(rc); end.add(Calendar.YEAR, yearRange); return new Pair<>(start, end); } // in the past else if (this.beginPoint > this.endPoint) { Calendar start = copyCalendar(rc); Calendar end = copyCalendar(rc); start.add(Calendar.YEAR, 0 - yearRange); return new Pair<>(start, end); } throw new RuntimeException("begin and end are equal " + this); } // PDDM if (Pattern.matches("P\\d+M", this.val) && documentTime != null) { Calendar rc = documentTime.getDate(); int monthRange = Integer.parseInt(this.val.substring(1, this.val.length() - 1)); // in the future if (this.beginPoint < this.endPoint) { Calendar start = copyCalendar(rc); Calendar end = copyCalendar(rc); end.add(Calendar.MONTH, monthRange); return new Pair<>(start, end); } // in the past if (this.beginPoint > this.endPoint) { Calendar start = copyCalendar(rc); Calendar end = copyCalendar(rc); start.add(Calendar.MONTH, 0 - monthRange); return new Pair<>(start, end); } throw new RuntimeException("begin and end are equal " + this); } // PDDD if (Pattern.matches("P\\d+D", this.val) && documentTime != null) { Calendar rc = documentTime.getDate(); int dayRange = Integer.parseInt(this.val.substring(1, this.val.length() - 1)); // in the future if (this.beginPoint < this.endPoint) { Calendar start = copyCalendar(rc); Calendar end = copyCalendar(rc); end.add(Calendar.DAY_OF_MONTH, dayRange); return new Pair<>(start, end); } // in the past if (this.beginPoint > this.endPoint) { Calendar start = copyCalendar(rc); Calendar end = copyCalendar(rc); start.add(Calendar.DAY_OF_MONTH, 0 - dayRange); return new Pair<>(start, end); } throw new RuntimeException("begin and end are equal " + this); } // YYYYSP if (Pattern.matches("\\d+SP", this.val)) { int year = Integer.parseInt(this.val.substring(0, 4)); Calendar start = makeCalendar(year, 2, 1); Calendar end = makeCalendar(year, 4, 31); return new Pair<>(start, end); } // YYYYSU if (Pattern.matches("\\d+SU", this.val)) { int year = Integer.parseInt(this.val.substring(0, 4)); Calendar start = makeCalendar(year, 5, 1); Calendar end = makeCalendar(year, 7, 31); return new Pair<>(start, end); } // YYYYFA if (Pattern.matches("\\d+FA", this.val)) { int year = Integer.parseInt(this.val.substring(0, 4)); Calendar start = makeCalendar(year, 8, 1); Calendar end = makeCalendar(year, 10, 31); return new Pair<>(start, end); } // YYYYWI if (Pattern.matches("\\d+WI", this.val)) { int year = Integer.parseInt(this.val.substring(0, 4)); Calendar start = makeCalendar(year, 11, 1); Calendar end = makeCalendar(year + 1, 1, 29); return new Pair<>(start, end); } // YYYYWDD if (Pattern.matches("\\d\\d\\d\\dW\\d+", this.val)) { int year = Integer.parseInt(this.val.substring(0, 4)); int week = Integer.parseInt(this.val.substring(5)); int startDay = (week - 1) * 7; int endDay = startDay + 6; Calendar start = makeCalendar(year, startDay); Calendar end = makeCalendar(year, endDay); return new Pair<>(start, end); } // PRESENT_REF if (this.val.equals("PRESENT_REF")) { Calendar rc = documentTime.getDate(); // todo: This case doesn't check for documentTime being null and will NPE Calendar start = copyCalendar(rc); Calendar end = copyCalendar(rc); return new Pair<>(start, end); } throw new RuntimeException(String.format("unknown value \"%s\" in %s", this.val, this)); } private static Calendar makeCalendar(int year, int month, int day) { Calendar date = Calendar.getInstance(); date.clear(); date.set(year, month - 1, day, 0, 0, 0); return date; } private static Calendar makeCalendar(int year, int dayOfYear) { Calendar date = Calendar.getInstance(); date.clear(); date.set(Calendar.YEAR, year); date.set(Calendar.DAY_OF_YEAR, dayOfYear); return date; } private static Calendar copyCalendar(Calendar c) { Calendar date = Calendar.getInstance(); date.clear(); date.set(c.get(Calendar.YEAR), c.get(Calendar.MONTH), c.get(Calendar.DAY_OF_MONTH), c.get(Calendar.HOUR_OF_DAY), c .get(Calendar.MINUTE), c.get(Calendar.SECOND)); return date; } }