package edu.fudan.ontology.graph;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import com.sun.org.apache.xalan.internal.xsltc.compiler.sym;
import com.sun.swing.internal.plaf.synth.resources.synth;
/**
* 词典
* @author Xipeng
*
*/
public class WordGraph {
private ArrayList<Word> words;
private ArrayList<HashSet<Integer>> symIndex;
private HashMap<String,Integer> index;
private SparseMatrix<WordRelationEnum> edges;
public WordGraph() {
words = new ArrayList<Word>();
index = new HashMap<String, Integer>();
symIndex = new ArrayList<HashSet<Integer>>();
edges = new SparseMatrix<WordRelationEnum>(100);
}
public void read(String path) throws IOException{
BufferedReader bfr;
bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
String line = null;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
String[] toks = line.split("\\s+");
WordRelationEnum rel = WordRelationEnum.getWithName(toks[0]);
if(toks.length<2)
continue;
addRel(rel,Arrays.copyOfRange(toks, 1, toks.length));
}
bfr.close();
}
public void addRel(WordRelationEnum rel, String[] toks){
int[] ids = new int[toks.length];
for(int i=0;i<toks.length;i++){
Word w = new Word(toks[i]);
ids[i] = add(w);
}
if(rel==WordRelationEnum.SYM){
HashSet<Integer> set = null;
for(int i=0;i<ids.length;i++){
int j = containSym(ids[i]);
if(j!=-1){
set = symIndex.get(j);
break;
}
}
if(set==null){
set = new HashSet<Integer>();
symIndex.add(set);
}
for(int i=0;i<ids.length;i++){
set.add(ids[i]);
}
}
for(int i=0;i<ids.length;i++){
for(int j=i+1;j<ids.length;j++){
edges.set(ids[i],ids[j], rel);
if(rel.getDirection()==Direction.BOTH)
edges.set(ids[j],ids[i], rel);
}
}
}
private int containSym(int i) {
for(int j=0;j<symIndex.size();j++){
HashSet<Integer> sett = symIndex.get(j);
if(sett.contains(i)){
return j;
}
}
return -1;
}
private int add(Word w) {
Integer id = index.get(w.word);
if(id==null){
id = words.size();
words.add(w);
index.put(w.word, id);
}
return id;
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
WordGraph wg = new WordGraph();
wg.read("./models/wordgraph.txt");
boolean b = wg.isSym("去","到");
System.out.println(b);
b = wg.isSym("我","到");
System.out.println(b);
b = wg.isAntonym("唱","听");
System.out.println(b);
}
public boolean isAntonym(String w1, String w2) {
if(w1.equals(w2))
return false;
Integer id1 = index.get(w1);
Integer id2 = index.get(w2);
if(id1==null||id2==null)
return false;
WordRelationEnum rel = edges.get(id1,id2);
if(rel==WordRelationEnum.ANTONYM)
return true;
else
return false;
}
public void addAntonym(String[] s) {
addRel(WordRelationEnum.ANTONYM,s);
}
public boolean isSym(String w1, String w2) {
if(w1.equals(w2))
return true;
Integer id1 = index.get(w1);
Integer id2 = index.get(w2);
if(id1==null||id2==null)
return false;
WordRelationEnum rel = edges.get(id1,id2);
if(rel==WordRelationEnum.SYM)
return true;
else
return false;
}
public void addSym(String[] s) {
addRel(WordRelationEnum.SYM,s);
}
public String toString(){
StringBuilder sb= new StringBuilder();
long[] idx = edges.getKeyIdx();
for (long i:idx){
int[] idices = edges.getIndices(i);
String w1 = words.get(idices[0]).word;
String w2 = words.get(idices[1]).word;
WordRelationEnum rel = edges.get(idices[0],idices[1]);
sb.append(rel.name());
sb.append(": ");
sb.append(w1);
sb.append(" ");
sb.append(w2);
sb.append("\n");
}
return sb.toString();
}
public String getSymID(String w) {
Integer id = index.get(w);
if(id==null)
return w;
int idx = containSym(id);
if(idx!=-1){
return "SYM"+idx;
}
return w;
}
public ArrayList<String[]> getSymID() {
ArrayList<String[]> al = new ArrayList<String[]>();
for(int i=0;i<symIndex.size();i++){
al.add(new String[]{"SYM"+i});
}
return al;
}
}