package de.unihd.dbs.uima.annotator.heideltime.resources;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.TreeMap;
import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
/**
*
* This class fills the role of a manager of all the RePattern resources.
* It reads the data from a file system and fills up a bunch of HashMaps
* with their information.
* @author jannik stroetgen
*
*/
public class RePatternManager extends GenericResourceManager {
protected static HashMap<Language, RePatternManager> instances = new HashMap<Language, RePatternManager>();
// STORE PATTERNS AND NORMALIZATIONS
private TreeMap<String, String> hmAllRePattern;
/**
* Constructor calls the parent constructor that sets language/resource
* parameters and collects resource repatterns.
* @param language
*/
private RePatternManager(String language) {
// calls the Generic constructor with repattern parameter
super("repattern", language);
// initialize the member map of all repatterns
hmAllRePattern = new TreeMap<String, String>();
//////////////////////////////////////////////////////
// READ PATTERN RESOURCES FROM FILES AND STORE THEM //
//////////////////////////////////////////////////////
HashMap<String, String> hmResourcesRePattern = readResourcesFromDirectory();
for (String which : hmResourcesRePattern.keySet()) {
hmAllRePattern.put(which, "");
}
readRePatternResources(hmResourcesRePattern);
}
/**
* singleton producer.
* @return singleton instance of RePatternManager
*/
public static RePatternManager getInstance(Language language) {
if(!instances.containsKey(language)) {
RePatternManager nm = new RePatternManager(language.getResourceFolder());
instances.put(language, nm);
}
return instances.get(language);
}
/**
* READ THE REPATTERN FROM THE FILES. The files have to be defined in the HashMap hmResourcesRePattern.
* @param hmResourcesRePattern RePattern resources to be interpreted
*/
private void readRePatternResources(HashMap<String, String> hmResourcesRePattern) {
//////////////////////////////////////
// READ REGULAR EXPRESSION PATTERNS //
//////////////////////////////////////
try {
for (String resource : hmResourcesRePattern.keySet()) {
Logger.printDetail(component, "Adding pattern resource: "+resource);
// create a buffered reader for every repattern resource file
BufferedReader in = new BufferedReader(new InputStreamReader
(this.getClass().getClassLoader().getResourceAsStream(hmResourcesRePattern.get(resource)),"UTF-8"));
for (String line; (line=in.readLine()) != null; ) {
if (!line.startsWith("//")) {
boolean correctLine = false;
if (!(line.equals(""))) {
correctLine = true;
for (String which : hmAllRePattern.keySet()) {
if (resource.equals(which)) {
String devPattern = hmAllRePattern.get(which);
devPattern = devPattern + "|" + line;
hmAllRePattern.put(which, devPattern);
}
}
}
if ((correctLine == false) && (!(line.matches("")))) {
Logger.printError(component, "Cannot read one of the lines of pattern resource "+resource);
Logger.printError(component, "Line: "+line);
}
}
}
}
////////////////////////////
// FINALIZE THE REPATTERN //
////////////////////////////
for (String which : hmAllRePattern.keySet()) {
finalizeRePattern(which, hmAllRePattern.get(which));
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Pattern containing regular expression is finalized, i.e., created correctly and added to hmAllRePattern.
* @param name key name
* @param rePattern repattern value
*/
private void finalizeRePattern(String name, String rePattern) {
// create correct regular expression
rePattern = rePattern.replaceFirst("\\|", "");
/* this was added to reduce the danger of getting unusable groups from user-made repattern
* files with group-producing parentheses (i.e. "(foo|bar)" while matching against the documents. */
rePattern = rePattern.replaceAll("\\(([^\\?])", "(?:$1");
rePattern = "(" + rePattern + ")";
rePattern = rePattern.replaceAll("\\\\", "\\\\\\\\");
// add rePattern to hmAllRePattern
hmAllRePattern.put(name, rePattern);
}
/**
* proxy method to access the hmAllRePattern member
* @param key key to check for
* @return whether the map contains the key
*/
public Boolean containsKey(String key) {
return hmAllRePattern.containsKey(key);
}
/**
* proxy method to access the hmAllRePattern member
* @param key Key to retrieve data from
* @return String from the map
*/
public String get(String key) {
return hmAllRePattern.get(key);
}
}