package edu.stanford.nlp.tagger.maxent;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.*;
import java.util.Map;
import java.util.Set;
import edu.stanford.nlp.util.Generics;
public class ASBCunkDict {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(ASBCunkDict.class);
private static final String defaultFilename = "/u/nlp/data/pos-tagger/asbc_amb.fixed.gb18030";
private static ASBCunkDict ASBCunkDictSingleton = null;
private static synchronized ASBCunkDict getInstance() {
if (ASBCunkDictSingleton == null) {
ASBCunkDictSingleton = new ASBCunkDict();
}
return ASBCunkDictSingleton;
}
private ASBCunkDict() {
readASBCunkDict(defaultFilename);
}
private static Map <String, Set <String>> ASBCunk_dict;
private static void readASBCunkDict(String filename) {
try{
BufferedReader ASBCunkDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "GB18030"));
String ASBCunkDetectorLine;
ASBCunk_dict = Generics.newHashMap();
while ((ASBCunkDetectorLine = ASBCunkDetectorReader.readLine()) != null) {
String[] fields = ASBCunkDetectorLine.split(" ");
String tag=fields[1];
Set<String> words=ASBCunk_dict.get(tag);
if (words==null) {
words = Generics.newHashSet();
ASBCunk_dict.put(tag,words);
}
words.add(fields[0]);
}
} catch (FileNotFoundException e) {
log.info("ASBCunk not found:");
System.exit(-1);
} catch (IOException e) {
log.info("ASBCunk");
System.exit(-1);
}
}
protected static String getTag(String a1, String a2) {
ASBCunkDict dict = ASBCunkDict.getInstance();
if (dict.get(a1)== null) {
return "0";
}
if (dict.get(a1).contains(a2)) {
return "1";
}
return "0";
}
private static Set<String> get(String a){
return ASBCunk_dict.get(a);
}
/*
public static String getPathPrefix() {
return pathPrefix;
}
public static void setPathPrefix(String pathPrefix) {
ASBCunkDict.pathPrefix = pathPrefix;
}
*/
}//class