package cmu.arktweetnlp.impl.features; import java.io.*; import java.util.*; import org.apache.commons.codec.language.Metaphone; import cmu.arktweetnlp.util.BasicFileIO; public class TagDictionary { public static Map<String, List<String>> WORD_TO_POS; static { WORD_TO_POS = null; try { WORD_TO_POS = loadData(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static TagDictionary instance() { return new TagDictionary(); } static Map<String, List<String>> loadData() throws IOException { // log.info("loading POS tag dictionary..."); Metaphone _metaphone = new Metaphone(); _metaphone.setMaxCodeLen(100); HashMap<String, List<String>> wordToPos = new HashMap<String, List<String>>(); BufferedReader in = BasicFileIO.getResourceReader("/cmu/arktweetnlp/tagdict.txt"); String line; try { while((line = in.readLine()) != null) { String[] parts = line.trim().split("\t"); if (parts.length != 2) { System.out.println(parts.length); System.out.println("wtf " + line.trim() + " | " + parts.length); continue; } String word = parts[0]; String poses = parts[1].trim(); ArrayList<String> arr = new ArrayList(); //new String[poses.length()]; for (int i=0; i < poses.length(); i++) { arr.add(poses.substring(i,i+1)); } wordToPos.put(word, Collections.unmodifiableList(arr)); } } catch (IOException e) { throw new RuntimeException(e); } return Collections.unmodifiableMap(wordToPos); } public static void main(String args[]) { instance(); } }