package de.unihd.dbs.uima.annotator.heideltime.processors; import java.util.HashSet; import java.util.regex.MatchResult; import java.util.regex.Pattern; import org.apache.uima.cas.FSIterator; import org.apache.uima.jcas.JCas; import de.unihd.dbs.uima.annotator.heideltime.utilities.Toolbox; import de.unihd.dbs.uima.types.heideltime.Timex3; import de.unihd.dbs.uima.types.heideltime.Timex3Interval; /** * * This class removes TIMEX3 annotations for temponyms and adds * TIMEX3INTERVAL annotations containing (earliest|latest)(Begin|End) information. * @author jannik stroetgen * */ public class TemponymPostprocessing { public static void handleIntervals(JCas jcas){ HashSet<Timex3> timexes = new HashSet<>(); // iterate over all TEMPONYMS FSIterator iterTimex = jcas.getAnnotationIndex(Timex3.type).iterator(); while (iterTimex.hasNext()) { Timex3 t = (Timex3) iterTimex.next(); if (t.getTimexType().equals("TEMPONYM")) { // create a timex3interval for each temponym Timex3Interval ti = new Timex3Interval(jcas); System.err.println("TEMPONYM: " + t.getCoveredText()); ti.setBegin(t.getBegin()); ti.setEnd(t.getEnd()); ti.setTimexType(t.getTimexType()); ti.setAllTokIds(t.getAllTokIds()); ti.setTimexFreq(t.getTimexFreq()); ti.setTimexMod(t.getTimexMod()); ti.setTimexQuant(t.getTimexQuant()); // set a new id String id = t.getTimexId(); int newId = Integer.parseInt(id.replace("t", "")); newId += 100000; ti.setTimexId("t" + newId); // get the (earliest|last)(begin|end) information Pattern p = Pattern.compile("\\[(.*?), (.*?), (.*?), (.*?)\\]"); for (MatchResult mr : Toolbox.findMatches(p,t.getTimexValue())) { ti.setTimexValueEB(mr.group(1)); ti.setTimexValueLB(mr.group(2)); ti.setTimexValueEE(mr.group(3)); ti.setTimexValueLE(mr.group(4)); } //System.err.println("temponym: " + t.getTimexValue()); if ((ti.getTimexValueEB() == ti.getTimexValueLB()) && (ti.getTimexValueLB() == ti.getTimexValueEE()) && (ti.getTimexValueEE() == ti.getTimexValueLE())) { ti.setTimexValue(ti.getTimexValueEB()); t.setTimexValue(ti.getTimexValueEB()); } else { // what's the best single value for an interval!? t.setEmptyValue(t.getTimexValue()); ti.setTimexValue(ti.getTimexValueLE()); t.setTimexValue(ti.getTimexValueLE()); } ti.setFoundByRule(t.getFoundByRule()); ti.addToIndexes(); timexes.add(t); } } // shall the standard timexes really be removed? for (Timex3 t : timexes){ t.removeFromIndexes(); } } }