TreeNode.java example

Explorer
relax-decode-master
- third-party
/**
 * 
 */
package joshua.discriminative.syntax_reorder;

import java.io.BufferedWriter;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Vector;

import joshua.discriminative.FileUtilityOld;


public class TreeNode {
	
	public String name="";
	public String nameAfterReorder="";
	
	public List<TreeNode> children = new ArrayList<TreeNode>();
	public TreeNode parent=null;
	
	//public int pos=-1;//what is my position in my parent
	public String terminalSymbol="";
	
	//with alignment information
	int terminalID=-1; //if this is a pre-terminal, then remember the terminal id (start from 0)
	Vector span = null;//according to ISI's syntaxMT, this only consider the first/last word index in the english string, e.g., 1-3 or 10-10
	Vector complementSpans = null;//according to ISI's syntaxMT, it can be : 1-3,6-8,10-10
	boolean isFrontier=false;
	
	public TreeNode(){
	
	}
	
	public TreeNode(String n){
		this.name = n;
		this.nameAfterReorder = name;
	}
	
	public void setAsPreTerminal(String symbol){
		this.terminalSymbol = symbol;
	}
	
	public void replaceContentWith(TreeNode to){
		this.name=to.name;
		this.nameAfterReorder=to.nameAfterReorder;
		this.children=to.children;
		this.parent=to.parent;
		this.terminalSymbol=to.terminalSymbol;
	}
	
	
	
	//check whether the "from" is subsumed by myself, from may be taged by X		
	public boolean isSubsume(TreeNode from, Map<String, TreeNode> tag_tbl){
		//tag_tbl will remember how the node is taged according rule in "from"
		boolean res=true;			
		String from_str = new String(from.name);
		String this_str = new String(this.name);
		if(from_str.matches("x\\d+\\:.+")){
			tag_tbl.put(from_str.substring(0, 2),this);//TODO: assume the d is between [0,9]
			//System.out.println("from_str: " + from_str + " Size: " +tag_tbl.size());
		}
		from_str = from_str.replaceAll("x\\d+\\:", "");//e.g., skip "x0:" in x0:NP			
		
		//if(from_str.compareTo(this_str)!=0){//first, see whether the name match
		int res_match = patternMatch(from_str, this_str);
		if(res_match==0){//first, see whether the name match
			res=false;
			//tag_tbl.clear();
		}else if(res_match==1){//name match pass, need recursively "AND" chilren match
			//System.out.println("AND children match");
			if(from.children.size()>0){//if from has no children, then return true
				if(from.children.size()==this.children.size()){
					//System.out.println("chilren size:" + this.l_children.size());
					for(int i=0; i<from.children.size();i++){				
						if(( this.children.get(i)).isSubsume(from.children.get(i), tag_tbl)==false){
							res=false;
							//tag_tbl.clear();
							break;
						}					
					}
				}else{
					res=false;
					//tag_tbl.clear();
				}
			}
		}else if(res_match==2){//name match pass, need recursively "OR" chilren match
			//System.out.println("OR children match");
			boolean t_res=false;
			if(from.children.size()==1){//In "OR" condition, must have one and only one child
				for(int i=0; i<this.children.size();i++){
					/*note that we should not clear the tag_tbl if one of the chilren fails, that's why we delete tag_tbl.clear()*/
					if(( this.children.get(i)).isSubsume( from.children.get(0),tag_tbl)==true){//any sucess
						t_res=true;						
						break;
					}					
				}
			}
			if(t_res==false){
				res=false;
				//tag_tbl.clear();
			}
		}else{
			//this should not happen
		}
		//System.out.println("res: " + res + " size: " + tag_tbl.size());
		return res;			
	}
	
	
	//########################## with alignment information
	public boolean setFrontierFlag(){			
		if(span==null){//unaligned source node
			isFrontier = false;
			return false;
		}else if(complementSpans==null || complementSpans.size()<=0){//span over all the english words
			isFrontier=true;
			return true;
		}else{				
			int span_start=((Integer)span.get(0)).intValue();
			int span_end=((Integer)span.get(1)).intValue();
			for(int i=0; i< complementSpans.size(); i++){
				Vector t_comp = (Vector) complementSpans.get(i);
				if( ( span_start >= ((Integer)t_comp.get(0)).intValue() && span_start <= ((Integer)t_comp.get(1)).intValue()) ||
					( span_end >= ((Integer)t_comp.get(0)).intValue() && span_end <= ((Integer)t_comp.get(1)).intValue()) ||
					( span_start <= ((Integer)t_comp.get(0)).intValue() && span_end >= ((Integer)t_comp.get(1)).intValue())//subsume
					){
					isFrontier = false;
					return false;						
				}
			}
			isFrontier=true;
			return true;
		}
	}
	
	
	public void deriveRule(Hashtable rule_tbl, int len_tgt, BufferedWriter out){
		//NP ||| (x0:DNP (LCP fake) (DEG fake)) (x1:NP fake) ||| x1 x0 ||| 0 0 0 0 0		
		if(isFrontier==true && children.size()>1){//only extract rule for frontier node with more than one children
			int[] x_id = new int[1];
			x_id[0]=0;
			
			String[] v_rhs_frags= new String[len_tgt];//the index is the start pos in the target, value is the rhs symbol
			String[] v_rhs_unaligned= new String[len_tgt+1];//the index remember how many spans should put before it
			String[] str_lhs = new String[1];				
			str_lhs[0]="";
			//get the lhs symbols
			ctrlDeriveSubrule( x_id, str_lhs, v_rhs_frags, v_rhs_unaligned);
			
			//now begin to work on the rhs symbols
			System.out.print(str_lhs[0]+" => ");				
			String str_rhs="";
			int num_comsumed_span=0;				
			for(int start_pos = 0; start_pos< v_rhs_frags.length; start_pos++){
				if(v_rhs_frags[start_pos]!=null){						
					//before we print aligned symbol, look at unaligned one
					for(int n_left = 0; n_left<v_rhs_unaligned.length && n_left<=num_comsumed_span; n_left++){
						if(v_rhs_unaligned[n_left]!=null){													
							//System.out.print(v_rhs_unaligned[n_left] +" ");
							str_rhs += v_rhs_unaligned[n_left] +" ";
							v_rhs_unaligned[n_left]=null;
						}
					}						
					//print aligned symbols
					//System.out.print(v_rhs_frags[start_pos] +" ");
					str_rhs += v_rhs_frags[start_pos] +" ";
					num_comsumed_span++;													
				}
			}
			//print all the remaining unalinged words
			for(int n_left = 0; n_left<v_rhs_unaligned.length; n_left++){
				if(v_rhs_unaligned[n_left]!=null){													
					//System.out.print(v_rhs_unaligned[n_left] +" ");
					str_rhs += v_rhs_unaligned[n_left] +" ";
					v_rhs_unaligned[n_left]=null;
				}
			}
			if(out==null)
				System.out.print(str_lhs[0].trim()+" => " + str_rhs.trim() +"\n");
			else{
				FileUtilityOld.writeLzf(out,str_lhs[0].trim()+" ||| " + str_rhs.trim() +" ||| 1\n");					
			}
		}
	}
	
	/* return value
	 * 0: "AND"/"OR" name-matches-stage fail
	 * 1: "AND" name-matches-stage susscessful, need to do full-chilren match
	 * 2: "OR" name-matches-stage susscessful, means that for children match, we should return true as long as one match the pattern, do not consider the number of chilren
	 * */
	private int patternMatch(String from, String to){	
		if(from.matches("\\|.+")){
			String from2 = from.replaceFirst("\\|", "");
			if(to.compareTo(from2)==0)
				return 2;
		}else{
			if(from.compareTo("*")==0 || to.compareTo("*")==0)//anything will match
				return 1;
			else if(from.matches("\\!.+")){
				String from2 = from.replaceFirst("\\!", "");
				if(to.compareTo(from2)!=0)
					return 1;
			}else if(to.matches("\\!.+")){
				String to2 = to.replaceFirst("\\!", "");
				if(from.compareTo(to2)!=0)
					return 1;
			}else if(from.compareTo(to)==0){
				return 1;
			}
		}
		return 0;
	}
	
	private void ctrlDeriveSubrule(int[] x_id, String[] str_lhs, String[] v_rhs_frags, String[] v_rhs_unaligned){
		//System.out.print("(" + name + " ");
		str_lhs[0] += "(" + name + " ";
		for(int i=0; i<children.size(); i++){
			TreeNode t_child = (TreeNode) children.get(i);
			t_child.deriveSubrule(x_id,str_lhs,v_rhs_frags, v_rhs_unaligned);
			if(i<children.size()-1){
				//System.out.print(" ");	
				str_lhs[0] +=" ";
			}
		}
		//System.out.print(")");
		str_lhs[0] +=")";
	}
	
	private void addRhs(String[] v_rhs_unaligned, int pos, String sym){
		if(v_rhs_unaligned[pos]==null)
			v_rhs_unaligned[pos] = sym;
		else
			v_rhs_unaligned[pos] +=" "+ sym;		
	}
	
	private void deriveSubrule(int[] x_id,String[] str_lhs, String[] v_rhs_frags, String[] v_rhs_unaligned){
		if(this.isFrontier==true){//note: pre-terminal can be frontier node
			//System.out.print("(x"+x_id[0]+":"+name + " f)");
			str_lhs[0]+="(x"+x_id[0]+":"+name + " f)";
			addRhs(v_rhs_frags, (Integer)span.get(0), "x"+x_id[0]);
			x_id[0]++;
		}else if(terminalSymbol != ""){//pre-terminal, chilren are special
			if(span==null){
				//System.out.print("(" + name + " ("+terminal_symbol+" n))");	
				str_lhs[0]+="(" + name + " ("+terminalSymbol+" n))";
				addRhs(v_rhs_unaligned, v_rhs_frags.length, terminalSymbol);//remember how many continuos span before me
			}else{//non-frontier pre-terminal
				/*TODO: now, we sort the words according to their start span, this may not be good for non-coninous translation
				 * for example, a prominent rile <=> ����(prominent) ����(a role), may need to a rule "(NP (ADJP (JJ (���� f))) (NP (NN (���� f)))) => ���� ����" 
				 * problem is due to 1-to-m non-continuous translation*/
				//System.out.print("(" + name + " ("+terminal_symbol+" f))");
				str_lhs[0]+="(" + name + " ("+terminalSymbol+" f))";
				addRhs(v_rhs_frags, (Integer)span.get(0), terminalSymbol);
			}
		}else{//call my children
			ctrlDeriveSubrule( x_id, str_lhs, v_rhs_frags,v_rhs_unaligned);
		}	
	}
	
	

}