package edu.stanford.nlp.wordseg;
import java.util.*;
import java.io.*;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
/**
* Check tag of each character from 5 different corpora. (4 official training corpora of Sighan bakeoff 2005, plus CTB)
* These tags are not external knowledge. They are learned from the training corpora.
* @author Huihsin Tseng
* @author Pichuan Chang
*/
public class CorpusChar {
private static final Redwood.RedwoodChannels logger = Redwood.channels(CorpusChar.class);
private Map <String, Set <String>> charMap;
public CorpusChar(String charlistFilename) {
charMap=readDict(charlistFilename);
}
private Map<String, Set<String>> getCharMap() {
return charMap;
}
private static Map<String, Set<String>> readDict(String filename) {
Map<String, Set<String>> char_dict;
try {
BufferedReader detectorReader = IOUtils.readerFromString(filename, "UTF-8");
char_dict = Generics.newHashMap();
//logger.debug("DEBUG: in CorpusChar readDict");
for (String detectorLine; (detectorLine = detectorReader.readLine()) != null; ) {
String[] fields = detectorLine.split(" ");
String tag=fields[0];
Set<String> chars= char_dict.get(tag);
if(chars==null){
chars = Generics.newHashSet();
char_dict.put(tag,chars);
}
//logger.debug("DEBUG: CorpusChar: "+filename+" "+fields[1]);
chars.add(fields[1]);
}
detectorReader.close();
} catch (IOException e) {
throw new RuntimeIOException(e);
}
logger.info("Loading character dictionary file from " + filename + " [done].");
return char_dict;
}
public String getTag(String a1, String a2) {
Map<String, Set<String>> h1=getCharMap();
Set<String> h2=h1.get(a1);
if (h2 == null) return "0";
if (h2.contains(a2))
return "1";
return "0";
}
/*
public String getCtbTag(String a1, String a2) {
HashMap h1=dict.getctb();
Set h2=(Set)h1.get(a1);
if (h2 == null) return "0";
if (h2.contains(a2))
return "1";
return "0";
}
public String getAsbcTag(String a1, String a2) {
HashMap h1=dict.getasbc();
Set h2=(Set)h1.get(a1);
if (h2 == null) return "0";
if (h2.contains(a2))
return "1";
return "0";
}
public String getPkuTag(String a1, String a2) {
HashMap h1=dict.getpku();
Set h2=(Set)h1.get(a1);
if (h2 == null) return "0";
if (h2.contains(a2))
return "1";
return "0";
}
public String getHkTag(String a1, String a2) {
HashMap h1=dict.gethk();
Set h2=(Set)h1.get(a1);
if (h2 == null) return "0";
if (h2.contains(a2))
return "1";
return "0";
}
public String getMsrTag(String a1, String a2) {
HashMap h1=dict.getmsr();
Set h2=(Set)h1.get(a1);
if (h2 == null) return "0";
if (h2.contains(a2))
return "1";
return "0";
}*/
}//end of class