package de.unihd.dbs.uima.annotator.heideltime.resources; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.HashMap; import java.util.regex.MatchResult; import java.util.regex.Pattern; import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger; import de.unihd.dbs.uima.annotator.heideltime.utilities.Toolbox; /** * * This class fills the role of a manager of all the rule resources. It reads * the data from a file system and fills up a bunch of HashMaps with their * information. * * @author jannik stroetgen * */ public class RuleManager extends GenericResourceManager { protected static HashMap<Language, RuleManager> instances = new HashMap<Language, RuleManager>(); // PATTERNS TO READ RESOURCES "RULES" AND "NORMALIZATION" Pattern paReadRules = Pattern .compile("RULENAME=\"(.*?)\",EXTRACTION=\"(.*?)\",NORM_VALUE=\"(.*?)\"(.*)"); // EXTRACTION PARTS OF RULES (patterns loaded from files) HashMap<Pattern, String> hmDatePattern = new HashMap<Pattern, String>(); HashMap<Pattern, String> hmDurationPattern = new HashMap<Pattern, String>(); HashMap<Pattern, String> hmTimePattern = new HashMap<Pattern, String>(); HashMap<Pattern, String> hmSetPattern = new HashMap<Pattern, String>(); // NORMALIZATION PARTS OF RULES (patterns loaded from files) HashMap<String, String> hmDateNormalization = new HashMap<String, String>(); HashMap<String, String> hmTimeNormalization = new HashMap<String, String>(); HashMap<String, String> hmDurationNormalization = new HashMap<String, String>(); HashMap<String, String> hmSetNormalization = new HashMap<String, String>(); // OFFSET PARTS OF RULES (patterns loaded from files) HashMap<String, String> hmDateOffset = new HashMap<String, String>(); HashMap<String, String> hmTimeOffset = new HashMap<String, String>(); HashMap<String, String> hmDurationOffset = new HashMap<String, String>(); HashMap<String, String> hmSetOffset = new HashMap<String, String>(); // QUANT PARTS OF RULES (patterns loaded from files) HashMap<String, String> hmDateQuant = new HashMap<String, String>(); HashMap<String, String> hmTimeQuant = new HashMap<String, String>(); HashMap<String, String> hmDurationQuant = new HashMap<String, String>(); HashMap<String, String> hmSetQuant = new HashMap<String, String>(); // FREQ PARTS OF RULES (patterns loaded from files) HashMap<String, String> hmDateFreq = new HashMap<String, String>(); HashMap<String, String> hmTimeFreq = new HashMap<String, String>(); HashMap<String, String> hmDurationFreq = new HashMap<String, String>(); HashMap<String, String> hmSetFreq = new HashMap<String, String>(); // MOD PARTS OF RULES (patterns loaded from files) HashMap<String, String> hmDateMod = new HashMap<String, String>(); HashMap<String, String> hmTimeMod = new HashMap<String, String>(); HashMap<String, String> hmDurationMod = new HashMap<String, String>(); HashMap<String, String> hmSetMod = new HashMap<String, String>(); // POS PARTS OF RULES (patterns loaded from files) HashMap<String, String> hmDatePosConstraint = new HashMap<String, String>(); HashMap<String, String> hmTimePosConstraint = new HashMap<String, String>(); HashMap<String, String> hmDurationPosConstraint = new HashMap<String, String>(); HashMap<String, String> hmSetPosConstraint = new HashMap<String, String>(); /** * Constructor calls the parent constructor that sets language/resource * parameters and collects rules resources. * * @param language * language of resources to be used */ private RuleManager(String language) { // Process Generic constructor with rules parameter super("rules", language); // ///////////////////////////////////////////////// // READ RULE RESOURCES FROM FILES AND STORE THEM // // ///////////////////////////////////////////////// HashMap<String, String> hmResourcesRules = readResourcesFromDirectory(); readRules(hmResourcesRules, language); } /** * singleton producer. * * @return singleton instance of RuleManager */ public static RuleManager getInstance(Language language) { if(!instances.containsKey(language)) { RuleManager nm = new RuleManager(language.getResourceFolder()); instances.put(language, nm); } return instances.get(language); } /** * READ THE RULES FROM THE FILES. The files have to be defined in the * HashMap hmResourcesRules. * * @param hmResourcesRules * rules to be interpreted */ public void readRules(HashMap<String, String> hmResourcesRules, String language) { try { for (String resource : hmResourcesRules.keySet()) { BufferedReader br = new BufferedReader(new InputStreamReader( this.getClass() .getClassLoader() .getResourceAsStream( hmResourcesRules.get(resource)))); Logger.printDetail(component, "Adding rule resource: " + resource); for (String line; (line = br.readLine()) != null;) { // skip comments or empty lines in resource files if (line.startsWith("//") || line.equals("")) continue; boolean correctLine = false; Logger.printDetail("DEBUGGING: reading rules..." + line); // check each line for the name, extraction, and // normalization part for (MatchResult r : Toolbox.findMatches(paReadRules, line)) { correctLine = true; String rule_name = r.group(1); String rule_extraction = r.group(2); String rule_normalization = r.group(3); String rule_offset = ""; String rule_quant = ""; String rule_freq = ""; String rule_mod = ""; String pos_constraint = ""; // ////////////////////////////////////////////////////////////////// // RULE EXTRACTION PARTS ARE TRANSLATED INTO REGULAR // EXPRESSSIONS // // ////////////////////////////////////////////////////////////////// // create pattern for rule extraction part Pattern paVariable = Pattern.compile("%(re[a-zA-Z0-9]*)"); RePatternManager rpm = RePatternManager.getInstance(Language.getLanguageFromString(language)); for (MatchResult mr : Toolbox.findMatches(paVariable, rule_extraction)) { Logger.printDetail("DEBUGGING: replacing patterns..." + mr.group()); if (!(rpm.containsKey(mr.group(1)))) { Logger.printError("Error creating rule:" + rule_name); Logger.printError("The following pattern used in this rule does not exist, does it? %" + mr.group(1)); System.exit(-1); } rule_extraction = rule_extraction.replaceAll("%" + mr.group(1), rpm.get(mr.group(1))); } rule_extraction = rule_extraction.replaceAll(" ", "[\\\\s]+"); Pattern pattern = null; try { pattern = Pattern.compile(rule_extraction); } catch (java.util.regex.PatternSyntaxException e) { Logger.printError("Compiling rules resulted in errors."); Logger.printError("Problematic rule is " + rule_name); Logger.printError("Cannot compile pattern: " + rule_extraction); e.printStackTrace(); System.exit(-1); } // Pattern pattern = Pattern.compile(rule_extraction); // /////////////////////////////////// // CHECK FOR ADDITIONAL CONSTRAINS // // /////////////////////////////////// if (!(r.group(4) == null)) { if (r.group(4).contains("OFFSET")) { Pattern paOffset = Pattern .compile("OFFSET=\"(.*?)\""); for (MatchResult ro : Toolbox.findMatches( paOffset, line)) { rule_offset = ro.group(1); } } if (r.group(4).contains("NORM_QUANT")) { Pattern paQuant = Pattern .compile("NORM_QUANT=\"(.*?)\""); for (MatchResult rq : Toolbox.findMatches( paQuant, line)) { rule_quant = rq.group(1); } } if (r.group(4).contains("NORM_FREQ")) { Pattern paFreq = Pattern .compile("NORM_FREQ=\"(.*?)\""); for (MatchResult rf : Toolbox.findMatches( paFreq, line)) { rule_freq = rf.group(1); } } if (r.group(4).contains("NORM_MOD")) { Pattern paMod = Pattern .compile("NORM_MOD=\"(.*?)\""); for (MatchResult rf : Toolbox.findMatches( paMod, line)) { rule_mod = rf.group(1); } } if (r.group(4).contains("POS_CONSTRAINT")) { Pattern paPos = Pattern .compile("POS_CONSTRAINT=\"(.*?)\""); for (MatchResult rp : Toolbox.findMatches( paPos, line)) { pos_constraint = rp.group(1); } } } // /////////////////////////////////////////// // READ DATE RULES AND MAKE THEM AVAILABLE // // /////////////////////////////////////////// if (resource.equals("daterules")) { // get extraction part hmDatePattern.put(pattern, rule_name); // get normalization part hmDateNormalization.put(rule_name, rule_normalization); // get offset part if (!(rule_offset.equals(""))) { hmDateOffset.put(rule_name, rule_offset); } // get quant part if (!(rule_quant.equals(""))) { hmDateQuant.put(rule_name, rule_quant); } // get freq part if (!(rule_freq.equals(""))) { hmDateFreq.put(rule_name, rule_freq); } // get mod part if (!(rule_mod.equals(""))) { hmDateMod.put(rule_name, rule_mod); } // get pos constraint part if (!(pos_constraint.equals(""))) { hmDatePosConstraint.put(rule_name, pos_constraint); } } // /////////////////////////////////////////////// // READ DURATION RULES AND MAKE THEM AVAILABLE // // /////////////////////////////////////////////// else if (resource.equals("durationrules")) { // get extraction part hmDurationPattern.put(pattern, rule_name); // get normalization part hmDurationNormalization.put(rule_name, rule_normalization); // get offset part if (!(rule_offset.equals(""))) { hmDurationOffset.put(rule_name, rule_offset); } // get quant part if (!(rule_quant.equals(""))) { hmDurationQuant.put(rule_name, rule_quant); } // get freq part if (!(rule_freq.equals(""))) { hmDurationFreq.put(rule_name, rule_freq); } // get mod part if (!(rule_mod.equals(""))) { hmDurationMod.put(rule_name, rule_mod); } // get pos constraint part if (!(pos_constraint.equals(""))) { hmDurationPosConstraint.put(rule_name, pos_constraint); } } // ////////////////////////////////////////// // READ SET RULES AND MAKE THEM AVAILABLE // // ////////////////////////////////////////// else if (resource.equals("setrules")) { // get extraction part hmSetPattern.put(pattern, rule_name); // get normalization part hmSetNormalization.put(rule_name, rule_normalization); // get offset part if (!rule_offset.equals("")) { hmSetOffset.put(rule_name, rule_offset); } // get quant part if (!rule_quant.equals("")) { hmSetQuant.put(rule_name, rule_quant); } // get freq part if (!rule_freq.equals("")) { hmSetFreq.put(rule_name, rule_freq); } // get mod part if (!rule_mod.equals("")) { hmSetMod.put(rule_name, rule_mod); } // get pos constraint part if (!pos_constraint.equals("")) { hmSetPosConstraint.put(rule_name, pos_constraint); } } // /////////////////////////////////////////// // READ TIME RULES AND MAKE THEM AVAILABLE // // /////////////////////////////////////////// else if (resource.equals("timerules")) { // get extraction part hmTimePattern.put(pattern, rule_name); // get normalization part hmTimeNormalization.put(rule_name, rule_normalization); // get offset part if (!rule_offset.equals("")) { hmTimeOffset.put(rule_name, rule_offset); } // get quant part if (!rule_quant.equals("")) { hmTimeQuant.put(rule_name, rule_quant); } // get freq part if (!rule_freq.equals("")) { hmTimeFreq.put(rule_name, rule_freq); } // get mod part if (!rule_mod.equals("")) { hmTimeMod.put(rule_name, rule_mod); } // get pos constraint part if (!pos_constraint.equals("")) { hmTimePosConstraint.put(rule_name, pos_constraint); } } else { Logger.printDetail(component, "Resource not recognized by HeidelTime: " + resource); } } // ///////////////////////////////////////// // CHECK FOR PROBLEMS WHEN READING RULES // // ///////////////////////////////////////// if (!correctLine) { Logger.printError(component, "Cannot read the following line of rule resource " + resource); Logger.printError(component, "Line: " + line); } } } } catch (IOException e) { e.printStackTrace(); } } public final HashMap<Pattern, String> getHmDatePattern() { return hmDatePattern; } public final HashMap<Pattern, String> getHmDurationPattern() { return hmDurationPattern; } public final HashMap<Pattern, String> getHmTimePattern() { return hmTimePattern; } public final HashMap<Pattern, String> getHmSetPattern() { return hmSetPattern; } public final HashMap<String, String> getHmDateNormalization() { return hmDateNormalization; } public final HashMap<String, String> getHmTimeNormalization() { return hmTimeNormalization; } public final HashMap<String, String> getHmDurationNormalization() { return hmDurationNormalization; } public final HashMap<String, String> getHmSetNormalization() { return hmSetNormalization; } public final HashMap<String, String> getHmDateOffset() { return hmDateOffset; } public final HashMap<String, String> getHmTimeOffset() { return hmTimeOffset; } public final HashMap<String, String> getHmDurationOffset() { return hmDurationOffset; } public final HashMap<String, String> getHmSetOffset() { return hmSetOffset; } public final HashMap<String, String> getHmDateQuant() { return hmDateQuant; } public final HashMap<String, String> getHmTimeQuant() { return hmTimeQuant; } public final HashMap<String, String> getHmDurationQuant() { return hmDurationQuant; } public final HashMap<String, String> getHmSetQuant() { return hmSetQuant; } public final HashMap<String, String> getHmDateFreq() { return hmDateFreq; } public final HashMap<String, String> getHmTimeFreq() { return hmTimeFreq; } public final HashMap<String, String> getHmDurationFreq() { return hmDurationFreq; } public final HashMap<String, String> getHmSetFreq() { return hmSetFreq; } public final HashMap<String, String> getHmDateMod() { return hmDateMod; } public final HashMap<String, String> getHmTimeMod() { return hmTimeMod; } public final HashMap<String, String> getHmDurationMod() { return hmDurationMod; } public final HashMap<String, String> getHmSetMod() { return hmSetMod; } public final HashMap<String, String> getHmDatePosConstraint() { return hmDatePosConstraint; } public final HashMap<String, String> getHmTimePosConstraint() { return hmTimePosConstraint; } public final HashMap<String, String> getHmDurationPosConstraint() { return hmDurationPosConstraint; } public final HashMap<String, String> getHmSetPosConstraint() { return hmSetPosConstraint; } }