package de.unihd.dbs.uima.annotator.heideltime.processors; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.uima.UimaContext; import org.apache.uima.cas.FSIterator; import org.apache.uima.jcas.JCas; import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger; import de.unihd.dbs.uima.types.heideltime.Timex3; /** * Addition to HeidelTime to recognize several (mostly, but not * entirely christian) holidays. * @author Hans-Peter Pfeiffer * */ public class HolidayProcessor extends GenericProcessor { /** * Constructor just calls the parent constructor here. */ public HolidayProcessor() { super(); } /** * not needed here */ public void initialize(UimaContext aContext) { return; } /** * all the functionality was put into evaluateCalculationFunctions(). */ public void process(JCas jcas) { evaluateCalculationFunctions(jcas); } /** * This function replaces function calls from the resource files with their TIMEX value. * * @author Hans-Peter Pfeiffer * @param jcas */ public void evaluateCalculationFunctions(JCas jcas) { // build up a list with all found TIMEX expressions List<Timex3> linearDates = new ArrayList<Timex3>(); FSIterator iterTimex = jcas.getAnnotationIndex(Timex3.type).iterator(); // Create List of all Timexes of types "date" and "time" while (iterTimex.hasNext()) { Timex3 timex = (Timex3) iterTimex.next(); if ((timex.getTimexType().equals("DATE")) || (timex.getTimexType().equals("TIME"))) { linearDates.add(timex); } } ////////////////////////////////////////////// // go through list of Date and Time timexes // ////////////////////////////////////////////// //compile regex pattern for validating commands/arguments Pattern cmd_p = Pattern.compile("((\\w\\w\\w\\w)-(\\w\\w)-(\\w\\w))\\s+funcDateCalc\\((\\w+)\\((.+)\\)\\)"); Pattern year_p = Pattern.compile("(\\d\\d\\d\\d)"); Pattern date_p = Pattern.compile("(\\d\\d\\d\\d)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])"); Matcher cmd_m; Matcher year_m; Matcher date_m; String date; String year; String month; String day; String function; String args[]; String valueNew; for (int i = 0; i < linearDates.size(); i++) { Timex3 t_i = (Timex3) linearDates.get(i); String value_i = t_i.getTimexValue(); cmd_m = cmd_p.matcher(value_i); valueNew = value_i; if(cmd_m.matches()) { date = cmd_m.group(1); year = cmd_m.group(2); month = cmd_m.group(3); day = cmd_m.group(4); function = cmd_m.group(5); args = cmd_m.group(6).split("\\s*,\\s*"); //replace keywords in function with actual values for(int j=0; j<args.length; j++) { args[j] = args[j].replace("DATE", date); args[j] = args[j].replace("YEAR", year); args[j] = args[j].replace("MONTH", month); args[j] = args[j].replace("DAY", day); } if(function.equals("EasterSunday")) { year_m = year_p.matcher(args[0]); //check if args[0] is a valid YEAR value if(year_m.matches()) { //System.err.println("correct format"); valueNew = this.getEasterSunday(Integer.valueOf(args[0]), Integer.valueOf(args[1])); } else{ Logger.printError("wrong format"); valueNew = "XXXX-XX-XX"; } } else if(function.equals("WeekdayRelativeTo")) { date_m = date_p.matcher(args[0]); //check if args[0] is a valid DATE value if(date_m.matches()) { //System.err.println("correct format"); valueNew = this.getWeekdayRelativeTo(args[0], Integer.valueOf(args[1]), Integer.valueOf(args[2]), Boolean.parseBoolean(args[3])); } else{ Logger.printError("wrong format"); valueNew = "XXXX-XX-XX"; } } else{ // if function call doesn't match any supported function Logger.printError("command not found"); valueNew = "XXXX-XX-XX"; } } t_i.removeFromIndexes(); t_i.setTimexValue(valueNew); t_i.addToIndexes(); linearDates.set(i, t_i); } } /** * Get the date of a day relative to Easter Sunday in a given year. Algorithm used is from the "Physikalisch-Technische Bundesanstalt Braunschweig" PTB. * * @author Hans-Peter Pfeiffer * @param year * @param days * @return date */ public String getEasterSunday(int year, int days) { int K = year / 100; int M = 15 + ( ( 3 * K + 3 ) / 4 ) - ( ( 8 * K + 13 ) / 25 ); int S = 2 - ( (3 * K + 3) / 4 ); int A = year % 19; int D = ( 19 * A + M ) % 30; int R = ( D / 29) + ( ( D / 28 ) - ( D / 29 ) * ( A / 11 ) ); int OG = 21 + D - R; int SZ = 7 - ( year + ( year / 4 ) + S ) % 7; int OE = 7 - ( OG - SZ ) % 7; int OS = OG + OE; SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); Calendar c = Calendar.getInstance(); String date; if( OS <= 31 ) { date = String.format("%04d-03-%02d", year, OS); } else{ date = String.format("%04d-04-%02d", year, ( OS - 31 ) ); } try{ c.setTime(formatter.parse(date)); c.add(Calendar.DAY_OF_MONTH, days); date = formatter.format(c.getTime()); } catch (ParseException e) { e.printStackTrace(); } return date; } /** * Get the date of Eastersunday in a given year * * @author Hans-Peter Pfeiffer * @param year * @return date */ public String getEasterSunday(int year) { return getEasterSunday(year, 0); } /** * Get the date of a weekday relative to a date, e.g. first Wednesday before 11-23 * * @author Hans-Peter Pfeiffer * @param date * @param weekday * @param number * @param count_itself * @return */ public String getWeekdayRelativeTo(String date, int weekday, int number, boolean count_itself) { SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); Calendar c = Calendar.getInstance(); int day; int add; if(number == 0) { try{ c.setTime(formatter.parse(date)); date = formatter.format(c.getTime()); } catch (ParseException e) { e.printStackTrace(); } return date; } else{ if(number<0) { number+=1; } try{ c.setTime(formatter.parse(date)); day = c.get(Calendar.DAY_OF_WEEK); if((count_itself && number>0) || (!count_itself && number <= 0)) { if(day<=weekday) { add = weekday - day; } else{ add = weekday - day + 7; } } else{ if(day<weekday) { add = weekday - day; } else{ add = weekday - day + 7; } } add += (( number - 1) * 7); c.add(Calendar.DAY_OF_MONTH, add); date = formatter.format(c.getTime()); } catch (ParseException e) { e.printStackTrace(); } return date; } } /** * Get the date of a the first, second, third etc. weekday in a month * * @author Hans-Peter Pfeiffer * @param number * @param weekday * @param month * @param year * @return date */ public String getWeekdayOfMonth(int number, int weekday, int month, int year) { return getWeekdayRelativeTo(String.format("%04d-%02d-01", year, month), weekday, number, true); } }