package edu.stanford.nlp.international.arabic;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.*;
import java.util.*;
import edu.stanford.nlp.international.arabic.pipeline.*;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.trees.treebank.Mapper;
import edu.stanford.nlp.util.Generics;
/**
* A singleton class backed by a map between words and stems. The present input format is
* the same as that used by the Arabic subject detector.
*
* @author Spence Green
*/
public class ArabicVerbStemBank {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(ArabicVerbStemBank.class);
private static ArabicVerbStemBank thisInstance = null;
private final Map<String,String> verbStems;
private final Buckwalter b2a;
private final Mapper lexMapper;
private ArabicVerbStemBank() {
verbStems = Generics.newHashMap();
b2a = new Buckwalter();
lexMapper = new DefaultLexicalMapper();
}
public synchronized static ArabicVerbStemBank getInstance() {
if(thisInstance == null) {
thisInstance = new ArabicVerbStemBank();
}
return thisInstance;
}
public String getStem(String word) {
if(verbStems.containsKey(word))
return verbStems.get(word);
return word;
}
public void load(String filename) {
try {
BufferedReader br = IOUtils.readerFromString(filename);
while (br.ready()) {
String[] toks = br.readLine().split("\\t");
List<String> toksList = Arrays.asList(toks);
assert toksList.size() == 8;
String word = toksList.get(0).replaceAll("\\|", "");
String stem = toksList.get(7).replaceAll("[_|-].*\\d$", "");
if(stem.equals("NA") || stem.equals("O")) continue;
stem = lexMapper.map(null, stem);
String uniStem = b2a.buckwalterToUnicode(stem);
if(!verbStems.containsKey(word))
verbStems.put(word, uniStem);
}
System.err.printf("%s: Loaded %d stems\n", this.getClass().getName(), verbStems.keySet().size());
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
//TODO Need to add proper debugging
e.printStackTrace();
}
}
//WSGDEBUG - For debugging
public void debugPrint(PrintWriter pw) {
for(String word : verbStems.keySet())
pw.printf("%s : %s\n",word,getStem(word));
}
/**
*/
public static void main(String[] args) {
ArabicVerbStemBank vsb = ArabicVerbStemBank.getInstance();
vsb.load("e.test");
PrintWriter pw = new PrintWriter(System.out,true);
vsb.debugPrint(pw);
}
}