package edu.fudan.nlp.parser.dep;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.TreeMap;
import java.util.TreeSet;
import edu.fudan.nlp.parser.dep.DependencyTree;
/**
* FudanNLP标准数据格式
* @since FudanNLP 1.5
*/
public class TreeCacheSent {
public String[] owords;
public String[][] words;
public String[] otags;
public String[][] tags;
public int[] heads;
public String[] relations;
/**
* 下标起始位置,缺省为0
*/
private int start = 0;
public TreeCacheSent(List<String> list) {
parse(list,start,true);
}
public TreeCacheSent() {
// TODO Auto-generated constructor stub
}
/**
* @param list 一个句子,每行是一组标记 上海 {"NR 2 NMOD", "浦东 NR 6 NMOD"}
* @param pos 标记开始位置,默认是1
* @param p 第一列是否为序列标记
*/
public void parse(List<String> list,int pos,boolean b) {
int len = list.size();
owords = new String[len];
otags = new String[len];
words = new String[len][];
tags = new String[len][];
heads = new int[len];
relations = new String[len];
int start=0;
if(b){
start = 1;
}
for(int j=0;j<len;j++){
String[] toks = list.get(j).split("[\\t\\s]+");
if(b){
assert (j+pos) == Integer.parseInt(toks[0]);
}
owords[j] = toks[start];
if(owords[j].equals("*"))
words[j] = null;
else{
words[j] = owords[j].split("\\|");
Arrays.sort(words[j]);
}
if(toks.length>start+1){
otags[j] = toks[start+1];
if(otags[j].equals("*"))
tags[j] = null;
else{
tags[j] = otags[j].split("\\|");
Arrays.sort(tags[j]);
}
}
if(toks.length>start+2)
heads[j] = Integer.parseInt(toks[start+2])-pos;
if(toks.length>start+3)
relations[j] = toks[start+3];
if(toks.length>start+4)
System.err.println("格式列表太多!");
}
}
public String toString(){
StringBuffer sb = new StringBuffer();
for(int j=0;j<words.length;j++){
if(words[j]!=null){
sb.append(j);
sb.append("\t");
sb.append(owords[j]);
if(tags!=null){
sb.append("\t");
sb.append(otags[j]);
}
if(heads!=null){
sb.append("\t");
sb.append(heads[j]);
}
if(relations!=null){
sb.append("\t");
sb.append(relations[j]);
}
sb.append("\n");
}
}
return sb.toString();
}
public int size() {
return words.length;
}
public boolean hasTag() {
if(tags[0]!=null)
return true;
else
return false;
}
public DependencyTree toTree() {
ArrayList<DependencyTree> nodes = new ArrayList<DependencyTree>();
DependencyTree root = null;
for(int j=0;j<words.length;j++){
DependencyTree node = new DependencyTree(j, owords[j],otags[j], relations[j]);
nodes.add(node);
}
for(int j=0;j<words.length;j++){
int head = heads[j];
if(head==-1)
root = nodes.get(j);
else{
if(head>j)
nodes.get(head).addLeftChild(nodes.get(j));
else
nodes.get(head).addRightChild(nodes.get(j));
}
}
return root;
}
}