package org.myrobotlab.document.transformer;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.myrobotlab.logging.LoggerFactory;
import org.slf4j.Logger;
import au.com.bytecode.opencsv.CSVReader;
/**
* A singleton class to load a dictionary into the jvm that can be used across
* multiple instances of a pipeline sage.
*
* @author kwatters
*
*/
public class DictionaryLoader {
public final static Logger log = LoggerFactory.getLogger(DictionaryLoader.class.getCanonicalName());
private static DictionaryLoader instance = null;
// csvFile -> map-of-values
private HashMap<String, HashMap<String, List<String>>> dictMap;
protected DictionaryLoader() {
// Exists only to defeat instantiation.
dictMap = new HashMap<String, HashMap<String, List<String>>>();
}
public static DictionaryLoader getInstance() {
if (instance == null) {
instance = new DictionaryLoader();
}
return instance;
}
public synchronized HashMap<String, List<String>> loadDictionary(String fileName) throws IOException {
// it's already loaded, just return it
if (dictMap.containsKey(fileName)) {
return dictMap.get(fileName);
}
// It's not loaded, load the file and put it in the dict map and return.
// Assume the file is a csv file with key/value pairs on each line.
HashMap<String, List<String>> dictionary = new HashMap<String, List<String>>();
File dictFile = new File(fileName);
if (!dictFile.exists()) {
log.warn("Dictionary file not found {}", dictFile.getAbsolutePath());
return null;
}
CSVReader reader = new CSVReader(new FileReader(fileName));
String[] line;
while ((line = reader.readNext()) != null) {
// line[] is an array of values from the line
List<String> listVal = dictionary.get(line[0]);
if (listVal == null) {
listVal = new ArrayList<String>();
dictionary.put(line[0], listVal);
}
for (int i = 1; i < line.length; i++) {
listVal.add(line[i]);
}
}
dictMap.put(fileName, dictionary);
reader.close();
return dictionary;
}
}