package joshua.discriminative.syntax_reorder;
/*Zhifei Li, <zhifei.work@gmail.com>
* Johns Hopkins University
*/
import java.io.BufferedWriter;
import java.io.OutputStreamWriter;
import java.util.Hashtable;
import java.util.Stack;
import java.util.Vector;
import joshua.discriminative.FileUtilityOld;
public class Tree {
public TreeNode root =null;
public Tree(String treeStr){//( (FRAG (NR �»���) (NR ����) (NT ����) (NT ʮһ��) (NN ��) (PU () (NN ����) (NR �ƺ�) (PU ))))
Stack<TreeNode> stack = new Stack<TreeNode>();
root = new TreeNode("fake_root");
stack.push(root);
for(int i=0; i<treeStr.length(); i++){
if(treeStr.charAt(i)=='('){//start a tag
int nextSpacePos = treeStr.indexOf(' ', i+1);//pos of next space
String tag = treeStr.substring(i+1, nextSpacePos);
//add the tag into stack and update the parent
TreeNode node = new TreeNode(tag);
TreeNode parent = stack.peek();
parent.children.add(node);
node.parent = parent;
stack.push(node);
//if((tree_str.charAt(pos_space+1)=='(') && (tag.compareTo("PU")!=0)){//start a new tag, go back to mainloop
if((treeStr.charAt(nextSpacePos+1)=='(') && (treeStr.charAt(nextSpacePos+2)!=')')){//start a new tag, go back to mainloop
i = nextSpacePos;//i++ will be always performed
}else{//the current parent is closed
int pos_closing = treeStr.indexOf(')', nextSpacePos+2);//pos of closing-bracket ")", skip 2 to avoid case like (PU ))
String terminalSymbol = treeStr.substring(nextSpacePos+1, pos_closing);
i = pos_closing;//i++ will be always performed
//remove the current tag from the stack and set is as pre-terminal
TreeNode node2= stack.pop();
node2.setAsPreTerminal(terminalSymbol);
//check whether another tags are closed, clearly, a closing of tag will only be intialized by the event that a preterminal is closed
i++;//either skip space or looking for another ")"
while(i<treeStr.length() && treeStr.charAt(i)==')' ){//end of a tag
stack.pop();
i++;//either skip space or looking for another ")"
}
}
}
}
}
public void printTreeTerminals(BufferedWriter out){
if(out==null)
out = new BufferedWriter(new OutputStreamWriter(System.out));
printTreeTerminals(root, out);
FileUtilityOld.writeLzf(out,"\n");
FileUtilityOld.flushLzf(out);
}
private void printTreeTerminals(TreeNode root, BufferedWriter out){
if(root.terminalSymbol != ""){
FileUtilityOld.writeLzf(out,root.terminalSymbol+" ");
}
for(int i=0;i<root.children.size();i++){
printTreeTerminals((TreeNode)root.children.get(i),out);
}
}
public void printTree(BufferedWriter out){
if(out==null)
out = new BufferedWriter(new OutputStreamWriter(System.out));
printTree(root, out);
}
private void printTree(TreeNode root, BufferedWriter out){
if(root.name.compareTo("fake_root")!=0){//not fake root
//FileUtility.write_lzf(out,"("+root.name+" "+root.terminal_symbol);
FileUtilityOld.writeLzf(out,"("+root.nameAfterReorder+" "+root.terminalSymbol);
}
for(int i=0;i<root.children.size();i++){
printTree((TreeNode)root.children.get(i),out);
if(i!=root.children.size()-1){
FileUtilityOld.writeLzf(out," ");
}
}
if(root.name.compareTo("fake_root")!=0){
FileUtilityOld.writeLzf(out,")");
}else{
FileUtilityOld.writeLzf(out,"\n");
}
FileUtilityOld.flushLzf(out);
}
public void printTreeStatistics(TreeNode root){
if(root.name.compareTo("fake_root")!=0){
System.out.print("Name: " + root.name+"; "+"Terminal: " + root.terminalSymbol + "; "+"Children: " + root.children.size() +"\n");
}else{
System.out.print("\n------Tree Staistics are\n");
}
for(int i=0;i<root.children.size();i++){
printTreeStatistics((TreeNode)root.children.get(i));
}
}
//////////// for alignment, begin
private void updateMinMax(int[] min_max, Vector t_v){
for(int i=0; i< t_v.size(); i++){
if( (Integer) t_v.get(i) <min_max[0]){
min_max[0] = (Integer) t_v.get(i);
}
if( (Integer) t_v.get(i) > min_max[1]){
min_max[1] = (Integer) t_v.get(i);
}
}
}
//each span is a vector with size 2: start pos and end pos
//return a vector of vector (each of which is a span)
private Vector unionOfSpans(Vector v_of_spans, int total_len){
if(v_of_spans==null || v_of_spans.size()<=0)
return null;
int[] buckets = new int[total_len];
for(int i=0; i <total_len; i++)
buckets[i]=-1;
for(int i=0; i<v_of_spans.size();i++ ){
Vector t_span = (Vector) v_of_spans.get(i);
for(int j= (Integer)t_span.get(0); j<= (Integer)t_span.get(1); j++){
buckets[j]=1;
}
}
Vector res = new Vector();
for(int i=0; i<total_len; i++){
if(buckets[i]==1){
Vector t_span = new Vector();
res.add(t_span);
t_span.add(new Integer(i));//begin
while(i<total_len && buckets[i]==1){
i++;
}
t_span.add(new Integer(i-1));//end
}
}
return res;
}
public void derive_complement_spans(int total_len){
derive_complement_spans(root, total_len);
}
private void derive_complement_spans(TreeNode root, int total_len){//total_len: len of target string
//idea: my complement span is the union of: my parent's complement span + my siblings's span
if(root.parent==null){//root nodes
Vector t_res = new Vector();
if( ((Integer)root.span.get(0)).intValue()>0){
Vector t_v = new Vector();
t_v.add(new Integer(0));
t_v.add(new Integer( ((Integer)root.span.get(0)).intValue()-1 ));
t_res.add(t_v);
}
if( ((Integer)root.span.get(1)).intValue()<total_len-1){
Vector t_v = new Vector();
t_v.add(new Integer( ((Integer)root.span.get(1)).intValue() ) );
t_v.add(new Integer(total_len-1) );
t_res.add(t_v);
}
if(t_res.size()>0)
root.complementSpans=t_res;
}else{
//get union of the spans of my siblings
Vector v_of_spans = new Vector();
if(root.parent.complementSpans!=null)
v_of_spans.addAll(root.parent.complementSpans);//my parent's complement spans
for(int i=0; i<root.parent.children.size();i++){
TreeNode t_n =(TreeNode) root.parent.children.get(i);
if(t_n!=root && t_n.span!=null){//exclude myself
v_of_spans.add(t_n.span);
}
}
root.complementSpans=unionOfSpans(v_of_spans,total_len);
}
/*if(root.complement_spans!=null)
System.out.println(root.name+": " +root.complement_spans.toString());*/
//recursively find complement span for my chilren
for(int i=0;i<root.children.size();i++){
derive_complement_spans((TreeNode)root.children.get(i),total_len);
}
}
public void derive_span(Hashtable align_tbl){
int[] terminal_pos = new int[1];
derive_span(root, align_tbl,terminal_pos);
}
private void derive_span(TreeNode root, Hashtable align_tbl, int[] terminal_pos){
//idea: my span is the union of my chilren's spans
//first get alignment of my chilren
for(int i=0;i<root.children.size();i++){
derive_span((TreeNode)root.children.get(i),align_tbl,terminal_pos );
}
//find min.max
int[] min_max = new int[2];
min_max[0]=10000; //min
min_max[1]=-1; //max
//assembly the alignment from my children
if(root.terminalSymbol!=""){//pre-terminal
if(align_tbl.containsKey(new Integer(terminal_pos[0]))){//have link
Vector t_v = (Vector)align_tbl.get(new Integer(terminal_pos[0]));
updateMinMax(min_max,t_v);
}
terminal_pos[0]++;
}else{
for(int i=0;i<root.children.size();i++){
Vector span_child = ((TreeNode)root.children.get(i)).span;
if(span_child!=null){
updateMinMax(min_max,span_child);
}
}
}
if(min_max[0]!=10000 && min_max[1] !=-1){
root.span = new Vector();
root.span.add(new Integer(min_max[0]));//min
root.span.add(new Integer(min_max[1]));//max
}
/*if(root.span!=null)
System.out.println(root.name+": " +root.span.toString());*/
}
public void tag_frontier_node(){
tag_frontier_node(root);
}
private void tag_frontier_node(TreeNode root){
root.setFrontierFlag();
//System.out.println(root.name + "frontier: " + root.frontier_flag);
for(int i=0; i< root.children.size();i++){
TreeNode n_child = (TreeNode)root.children.get(i);
tag_frontier_node(n_child);
}
}
public void extract_rule(Hashtable rule_tbl, int len_tgt, BufferedWriter out){
extract_rule(root,rule_tbl,len_tgt, out);
}
private void extract_rule(TreeNode root,Hashtable rule_tbl, int len_tgt, BufferedWriter out ){
root.deriveRule(rule_tbl, len_tgt, out);
for(int i=0; i< root.children.size();i++){
TreeNode n_child = (TreeNode)root.children.get(i);
extract_rule(n_child,rule_tbl, len_tgt, out);
}
}
}