package joshua.discriminative.syntax_reorder; import joshua.corpus.vocab.SymbolTable; /* Zhifei Li, <zhifei.work@gmail.com> * Johns Hopkins University */ //read the alignment into a hashtable, provide lookup, and min and max span //assumption: positions start from zero, alingment link is: frenchpos-englishpos public class Alignment { public int[] french_wrds; public int[] english_wrds; public int[][] alignment_matrix; public int[] num_alignments_infor_for_french;//how many eng words are aligned to each french wrd public int[] num_alignments_infor_for_english;//how many french words are aligned to each eng wrd public int[] min_pos_infor_for_french; //the min pos in english that aligned to each french wrd public int[] max_pos_infor_for_french; public int[] min_pos_infor_for_english; public int[] max_pos_infor_for_english; SymbolTable symbolTable = null; //TODO public Alignment(String french_str, String english_str, String align_str){ String[] french_wrds1 = french_str.split("\\s+"); french_wrds = new int[french_wrds1.length]; for(int i=0; i<french_wrds1.length; i++) french_wrds[i] = symbolTable.addTerminal(french_wrds1[i]); String[] english_wrds1 = english_str.split("\\s+"); english_wrds = new int[english_wrds1.length]; for(int i=0; i<english_wrds1.length; i++) english_wrds[i] = symbolTable.addTerminal(english_wrds1[i]); alignment_matrix = new int[french_wrds.length][english_wrds.length]; num_alignments_infor_for_french = new int[french_wrds.length]; num_alignments_infor_for_english = new int[english_wrds.length]; min_pos_infor_for_french = new int[french_wrds.length]; for(int t=0; t<min_pos_infor_for_french.length; t++) min_pos_infor_for_french[t]=english_wrds.length; max_pos_infor_for_french = new int[french_wrds.length]; for(int t=0; t<max_pos_infor_for_french.length; t++) max_pos_infor_for_french[t]=-1; min_pos_infor_for_english = new int[english_wrds.length]; for(int t=0; t<min_pos_infor_for_english.length; t++) min_pos_infor_for_english[t]=french_wrds.length; max_pos_infor_for_english = new int[english_wrds.length]; for(int t=0; t<max_pos_infor_for_english.length; t++) max_pos_infor_for_english[t]=-1; String[] links = align_str.split("\\s+"); for(int i=0; i < links.length; i++){ String[] ids = links[i].split("-"); int p_fr = new Integer(ids[0]); int p_eng = new Integer(ids[1]); if( p_fr<0 || p_fr >= french_wrds.length || p_eng<0 || p_eng >= english_wrds.length){ System.out.println("alignment information error, compared with the number of wrds"); System.exit(0); } alignment_matrix[p_fr][p_eng]=1; num_alignments_infor_for_french[p_fr]++; num_alignments_infor_for_english[p_eng]++; min_pos_infor_for_french[p_fr]=min(p_eng, min_pos_infor_for_french[p_fr]); max_pos_infor_for_french[p_fr]=max(p_eng, max_pos_infor_for_french[p_fr]); min_pos_infor_for_english[p_eng]=min(p_fr, min_pos_infor_for_english[p_eng]); max_pos_infor_for_english[p_eng]=max(p_fr, max_pos_infor_for_english[p_eng]); } } public static int max(int i, int j){ return (i>=j)?i:j; } public static int min(int i, int j){ return (i<=j)?i:j; } }