package joshua.discriminative.monolingual_parser; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import joshua.util.FileUtility; public class BilingualToMonolingualGrammar { public static void main(String[] args) throws IOException{ /*String bilingualGrammarFile = "C:\\data_disk\\java_work_space\\sf_trunk\\example\\example.hiero.tm.gz"; String monolingualGrammarFile = "C:\\data_disk\\java_work_space\\sf_trunk\\example\\example.hiero.mono.tm"; */ if(args.length<3){ System.out.println("Wrong number of parameters, it must have at least two parameters: java StructuredLMEM f_joshua_config f_train"); System.exit(1); } String bilingualGrammarFile = args[0]; String monolingualGrammarFile = args[1]; ArrayList<Double> featureWeights = new ArrayList<Double>(); for(int i=2; i<args.length; i++){ featureWeights.add(new Double(args[i])); } int numFeatures = featureWeights.size(); BufferedReader t_reader = FileUtility.getReadFileStream(bilingualGrammarFile); BufferedWriter t_writer = FileUtility.getWriteFileStream(monolingualGrammarFile); HashMap<String, Boolean> tbl_unique_rules = new HashMap<String, Boolean>(); String line; while ((line = FileUtility.read_line_lzf(t_reader)) != null) { String[] fds = line.split("\\s+\\|{3}\\s+");//[x] ||| cn ||| en ||| feature-scores if (fds.length != 4) { System.out.println("rule line does not have four fds; " + line); } StringBuffer rule = new StringBuffer(); rule.append(fds[0]); rule.append(" ||| "); //skip fds[1] /** we do not care about the order index of the non-terminal in the rule * */ rule.append(fds[2]); if(tbl_unique_rules.containsKey(rule.toString())){ System.out.println("duplicate rule: " + rule.toString()); }else{ tbl_unique_rules.put(rule.toString(),true); //rule.append(" ||| 0"); //rule.append(fds[3]); rule.append(" ||| "); String[] scores = fds[3].split("\\s+"); double combinedScore = 0; if(scores.length!=numFeatures){ System.out.println("number of features in rule is not " + numFeatures); System.exit(1); } for(int j=0; j<scores.length; j++) combinedScore += (new Double(scores[j]))*featureWeights.get(j); rule.append(combinedScore); rule.append("\n"); t_writer.write(rule.toString()); //System.out.println("rule: " + rule.toString()); } } t_reader.close(); t_writer.close(); } }