package edu.stanford.nlp.wordseg;
import java.util.*;
import java.io.*;
import org.apache.hadoop.fs.FSDataInputStream;
/**
* affixation information
* @author Huihsin Tseng
* @author Pichuan Chang
*/
@SuppressWarnings("unused")
public class affDict {
private String sighanCorporaDict = "/u/nlp/data/chinese-segmenter/";
private String affixFilename;
//public Set ctbIns, asbcIns, hkIns, pkIns, msrIns;
public Set<String> ins;
public affDict(String affixFilename) {
ins=readDict(affixFilename);
}
/**
* @author ferhanture
* @param stream
**/
public affDict(FSDataInputStream stream) {
ins=readDict(stream);
}
Set<String> getInDict() {return ins;}
private Set<String> readDict(String filename) {
Set<String> a = new HashSet<String>();
//System.err.println("XM:::readDict(filename: " + filename + ")");
try {
BufferedReader aDetectorReader;
/*
if(filename.endsWith("in.as") ||filename.endsWith("in.city") ){
aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "Big5_HKSCS"));
}else{ aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "GB18030"));
}
*/
aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
String aDetectorLine;
//System.err.println("DEBUG: in affDict readDict");
while ((aDetectorLine = aDetectorReader.readLine()) != null) {
//System.err.println("DEBUG: affDict: "+filename+" "+aDetectorLine);
a.add(aDetectorLine);
}
} catch (FileNotFoundException e) {
System.err.println("affDict: File not found");
System.err.println("filename: " + filename);
System.exit(-1);
} catch (IOException e) {
System.exit(-1);
}
return a;
}
/**
* @author ferhanture
*/
private Set<String> readDict(FSDataInputStream stream) {
Set<String> a = new HashSet<String>();
//System.err.println("XM:::readDict(filename: " + filename + ")");
try {
// BufferedReader aDetectorReader;
/*
if(filename.endsWith("in.as") ||filename.endsWith("in.city") ){
aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "Big5_HKSCS"));
}else{ aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "GB18030"));
}
*/
// aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
String aDetectorLine;
//System.err.println("DEBUG: in affDict readDict");
while ((aDetectorLine = stream.readLine()) != null) {
//System.err.println("DEBUG: affDict: "+filename+" "+aDetectorLine);
a.add(aDetectorLine);
}
} catch (FileNotFoundException e) {
System.err.println("affDict: Stream not found");
System.exit(-1);
} catch (IOException e) {
System.exit(-1);
}
return a;
}
public String getInDict(String a1) {
if (getInDict().contains(a1))
return "1";
return "0";
}
}//end of class