package de.unihd.dbs.uima.annotator.heideltime.processors; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.uima.UimaContext; import org.apache.uima.cas.FSIterator; import org.apache.uima.jcas.JCas; import de.unihd.dbs.uima.types.heideltime.Timex3; public class DecadeProcessor extends GenericProcessor { /** * Constructor just calls the parent constructor here. */ public DecadeProcessor() { super(); } /** * not needed here */ public void initialize(UimaContext aContext) { return; } /** * all the functionality was put into evaluateCalculationFunctions(). */ public void process(JCas jcas) { evaluateFunctions(jcas); } /** * This function replaces function calls from the resource files with their TIMEX value. * * @author Hans-Peter Pfeiffer * @param jcas */ public void evaluateFunctions(JCas jcas) { // build up a list with all found TIMEX expressions List<Timex3> linearDates = new ArrayList<Timex3>(); FSIterator iterTimex = jcas.getAnnotationIndex(Timex3.type).iterator(); // Create List of all Timexes of types "date" and "time" while (iterTimex.hasNext()) { Timex3 timex = (Timex3) iterTimex.next(); if (timex.getTimexType().equals("DATE")) { linearDates.add(timex); } } ////////////////////////////////////////////// // go through list of Date and Time timexes // ////////////////////////////////////////////// //compile regex pattern for validating commands/arguments Pattern cmd_p = Pattern.compile("(\\w\\w\\w\\w)-(\\w\\w)-(\\w\\w)\\s+decadeCalc\\((\\d+)\\)"); Matcher cmd_m; String year; String valueNew; String argument; for (int i = 0; i < linearDates.size(); i++) { Timex3 t_i = (Timex3) linearDates.get(i); String value_i = t_i.getTimexValue(); cmd_m = cmd_p.matcher(value_i); valueNew = value_i; if(cmd_m.matches()) { year = cmd_m.group(1); argument = cmd_m.group(4); valueNew = year.substring(0, Math.min(2, year.length())) + argument.substring(0, 1); } t_i.removeFromIndexes(); t_i.setTimexValue(valueNew); t_i.addToIndexes(); linearDates.set(i, t_i); } } }