package de.unihd.dbs.uima.annotator.heideltime.resources;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import de.unihd.dbs.uima.annotator.heideltime.utilities.*;
/**
*
* This class fills the role of a manager of all the Normalization resources.
* It reads the data from a file system and fills up a bunch of HashMaps
* with their information.
* @author jannik stroetgen
*
*/
public class NormalizationManager extends GenericResourceManager {
protected static HashMap<Language, NormalizationManager> instances = new HashMap<Language, NormalizationManager>();
// PATTERNS TO READ RESOURCES "RULES" AND "NORMALIZATION"
private Pattern paReadNormalizations = Pattern.compile("\"(.*?)\",\"(.*?)\"");
// STORE PATTERNS AND NORMALIZATIONS
private HashMap<String, RegexHashMap<String>> hmAllNormalization;
// ACCESS TO SOME NORMALIZATION MAPPINGS (set internally)
private HashMap<String, String> normDayInWeek;
private HashMap<String, String> normNumber;
private HashMap<String, String> normMonthName;
private HashMap<String, String> normMonthInSeason;
private HashMap<String, String> normMonthInQuarter;
/**
* Constructor calls the parent constructor that sets language/resource parameters,
* initializes basic and collects resource normalization patterns.
* @param language
*/
private NormalizationManager(String language) {
// calls the Generic constructor with normalization-parameter
super("normalization", language);
// initialize the data structures
hmAllNormalization = new HashMap<String, RegexHashMap<String>>();
normNumber = new HashMap<String, String>();
normDayInWeek = new HashMap<String, String>();
normMonthName = new HashMap<String, String>();
normMonthInSeason = new HashMap<String, String>();
normMonthInQuarter = new HashMap<String, String>();
// GLOBAL NORMALIZATION INFORMATION
readGlobalNormalizationInformation();
////////////////////////////////////////////////////////////
// READ NORMALIZATION RESOURCES FROM FILES AND STORE THEM //
////////////////////////////////////////////////////////////
HashMap<String, String> hmResourcesNormalization = readResourcesFromDirectory();
for (String which : hmResourcesNormalization.keySet()) {
hmAllNormalization.put(which, new RegexHashMap<String>());
}
readNormalizationResources(hmResourcesNormalization);
}
/**
* singleton producer.
* @return singleton instance of NormalizationManager
*/
public static NormalizationManager getInstance(Language language) {
if(!instances.containsKey(language)) {
NormalizationManager nm = new NormalizationManager(language.getResourceFolder());
instances.put(language, nm);
}
return instances.get(language);
}
/**
* Read the resources (of any language) from resource files and
* fill the HashMaps used for normalization tasks.
* @param hmResourcesNormalization normalization patterns to be interpreted
*/
public void readNormalizationResources(HashMap<String, String> hmResourcesNormalization) {
try {
for (String resource : hmResourcesNormalization.keySet()) {
Logger.printDetail(component, "Adding normalization resource: "+resource);
// create a buffered reader for every normalization resource file
BufferedReader in = new BufferedReader(new InputStreamReader
(this.getClass().getClassLoader().getResourceAsStream(hmResourcesNormalization.get(resource)),"UTF-8"));
for ( String line; (line=in.readLine()) != null; ) {
if (line.startsWith("//")) continue; // ignore comments
// check each line for the normalization format (defined in paReadNormalizations)
boolean correctLine = false;
for (MatchResult r : Toolbox.findMatches(paReadNormalizations, line)) {
correctLine = true;
String resource_word = r.group(1);
String normalized_word = r.group(2);
for (String which : hmAllNormalization.keySet()) {
if (resource.equals(which)) {
hmAllNormalization.get(which).put(resource_word,normalized_word);
}
}
if ((correctLine == false) && (!(line.matches("")))) {
Logger.printError("["+component+"] Cannot read one of the lines of normalization resource "+resource);
Logger.printError("["+component+"] Line: "+line);
}
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* sets a couple of rudimentary normalization parameters
*/
private void readGlobalNormalizationInformation() {
// MONTH IN QUARTER
normMonthInQuarter.put("01","1");
normMonthInQuarter.put("02","1");
normMonthInQuarter.put("03","1");
normMonthInQuarter.put("04","2");
normMonthInQuarter.put("05","2");
normMonthInQuarter.put("06","2");
normMonthInQuarter.put("07","3");
normMonthInQuarter.put("08","3");
normMonthInQuarter.put("09","3");
normMonthInQuarter.put("10","4");
normMonthInQuarter.put("11","4");
normMonthInQuarter.put("12","4");
// MONTH IN SEASON
normMonthInSeason.put("", "");
normMonthInSeason.put("01","WI");
normMonthInSeason.put("02","WI");
normMonthInSeason.put("03","SP");
normMonthInSeason.put("04","SP");
normMonthInSeason.put("05","SP");
normMonthInSeason.put("06","SU");
normMonthInSeason.put("07","SU");
normMonthInSeason.put("08","SU");
normMonthInSeason.put("09","FA");
normMonthInSeason.put("10","FA");
normMonthInSeason.put("11","FA");
normMonthInSeason.put("12","WI");
// DAY IN WEEK
normDayInWeek.put("sunday","1");
normDayInWeek.put("monday","2");
normDayInWeek.put("tuesday","3");
normDayInWeek.put("wednesday","4");
normDayInWeek.put("thursday","5");
normDayInWeek.put("friday","6");
normDayInWeek.put("saturday","7");
normDayInWeek.put("Sunday","1");
normDayInWeek.put("Monday","2");
normDayInWeek.put("Tuesday","3");
normDayInWeek.put("Wednesday","4");
normDayInWeek.put("Thursday","5");
normDayInWeek.put("Friday","6");
normDayInWeek.put("Saturday","7");
// normDayInWeek.put("sunday","7");
// normDayInWeek.put("monday","1");
// normDayInWeek.put("tuesday","2");
// normDayInWeek.put("wednesday","3");
// normDayInWeek.put("thursday","4");
// normDayInWeek.put("friday","5");
// normDayInWeek.put("saturday","6");
// normDayInWeek.put("Sunday","7");
// normDayInWeek.put("Monday","1");
// normDayInWeek.put("Tuesday","2");
// normDayInWeek.put("Wednesday","3");
// normDayInWeek.put("Thursday","4");
// normDayInWeek.put("Friday","5");
// normDayInWeek.put("Saturday","6");
// NORM MINUTE
normNumber.put("0","00");
normNumber.put("00","00");
normNumber.put("1","01");
normNumber.put("01","01");
normNumber.put("2","02");
normNumber.put("02","02");
normNumber.put("3","03");
normNumber.put("03","03");
normNumber.put("4","04");
normNumber.put("04","04");
normNumber.put("5","05");
normNumber.put("05","05");
normNumber.put("6","06");
normNumber.put("06","06");
normNumber.put("7","07");
normNumber.put("07","07");
normNumber.put("8","08");
normNumber.put("08","08");
normNumber.put("9","09");
normNumber.put("09","09");
normNumber.put("10","10");
normNumber.put("11","11");
normNumber.put("12","12");
normNumber.put("13","13");
normNumber.put("14","14");
normNumber.put("15","15");
normNumber.put("16","16");
normNumber.put("17","17");
normNumber.put("18","18");
normNumber.put("19","19");
normNumber.put("20","20");
normNumber.put("21","21");
normNumber.put("22","22");
normNumber.put("23","23");
normNumber.put("24","24");
normNumber.put("25","25");
normNumber.put("26","26");
normNumber.put("27","27");
normNumber.put("28","28");
normNumber.put("29","29");
normNumber.put("30","30");
normNumber.put("31","31");
normNumber.put("32","32");
normNumber.put("33","33");
normNumber.put("34","34");
normNumber.put("35","35");
normNumber.put("36","36");
normNumber.put("37","37");
normNumber.put("38","38");
normNumber.put("39","39");
normNumber.put("40","40");
normNumber.put("41","41");
normNumber.put("42","42");
normNumber.put("43","43");
normNumber.put("44","44");
normNumber.put("45","45");
normNumber.put("46","46");
normNumber.put("47","47");
normNumber.put("48","48");
normNumber.put("49","49");
normNumber.put("50","50");
normNumber.put("51","51");
normNumber.put("52","52");
normNumber.put("53","53");
normNumber.put("54","54");
normNumber.put("55","55");
normNumber.put("56","56");
normNumber.put("57","57");
normNumber.put("58","58");
normNumber.put("59","59");
normNumber.put("60","60");
// NORM MONTH
normMonthName.put("january","01");
normMonthName.put("february","02");
normMonthName.put("march","03");
normMonthName.put("april","04");
normMonthName.put("may","05");
normMonthName.put("june","06");
normMonthName.put("july","07");
normMonthName.put("august","08");
normMonthName.put("september","09");
normMonthName.put("october","10");
normMonthName.put("november","11");
normMonthName.put("december","12");
}
/*
* a bunch of getter methods to facilitate access to the data structures
*/
public final RegexHashMap<String> getFromHmAllNormalization(String key) {
return hmAllNormalization.get(key);
}
public final String getFromNormNumber(String key) {
return normNumber.get(key);
}
public final String getFromNormDayInWeek(String key) {
return normDayInWeek.get(key);
}
public final String getFromNormMonthName(String key) {
return normMonthName.get(key);
}
public final String getFromNormMonthInSeason(String key) {
return normMonthInSeason.get(key);
}
public final String getFromNormMonthInQuarter(String key) {
return normMonthInQuarter.get(key);
}
}