package edu.stanford.nlp.tagger.maxent; import edu.stanford.nlp.io.RuntimeIOException; import edu.stanford.nlp.util.Generics; import java.io.*; import java.util.Map; import java.util.Set; public class CTBunkDict { private static final String defaultFilename = "ctb_amb"; private static CTBunkDict CTBunkDictSingleton = null; private static Map<String, Set<String>> CTBunk_dict; private static CTBunkDict getInstance() { if (CTBunkDictSingleton == null) { CTBunkDictSingleton = new CTBunkDict(); } return CTBunkDictSingleton; } private CTBunkDict() { readCTBunkDict("/u/nlp/data/pos-tagger/dictionary" + "/" + defaultFilename); } private static void readCTBunkDict(String filename) { CTBunk_dict = Generics.newHashMap(); try{ BufferedReader CTBunkDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "GB18030")); for (String CTBunkDetectorLine; (CTBunkDetectorLine = CTBunkDetectorReader.readLine()) != null; ) { String[] fields = CTBunkDetectorLine.split(" "); String tag=fields[1]; Set<String> words=CTBunk_dict.get(tag); if(words==null){ words = Generics.newHashSet(); CTBunk_dict.put(tag,words); } words.add(fields[0]); } } catch (FileNotFoundException e) { throw new RuntimeIOException("CTBunk file not found: " + filename, e); } catch (IOException e) { throw new RuntimeIOException("CTBunk I/O error: " + filename, e); } } /** * Returns "1" as true if the dictionary listed this word with this tag, * and "0" otherwise. * * @param tag The POS tag * @param word The word * @return "1" as true if the dictionary listed this word with this tag, * and "0" otherwise. */ protected static String getTag(String tag, String word) { CTBunkDict dict = CTBunkDict.getInstance(); Set<String> words = dict.get(tag); if (words != null && words.contains(word)) { return "1"; } else { return "0"; } } private static Set<String> get(String a) { return CTBunk_dict.get(a); } } // end class CTBunkDict