package joshua.discriminative.training.contrastive_estimation;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.util.HashMap;
import joshua.discriminative.FileUtilityOld;
public class ConvertGrammarToFeatureFile {
public static void main(String[] args) {
//BLEU.computeEffectiveLen(null, false);
String grammarFile = args[0];
String featureFile = args[1];
boolean includeLHS = new Boolean(args[2]);
boolean includeForeign = new Boolean(args[3]);
boolean includeEnglish = new Boolean(args[4]);
double initWeight = 0;
BufferedReader reader = FileUtilityOld.getReadFileStream(grammarFile ,"UTF-8");
//== get the hash table
HashMap<String, Double> featureTbl = new HashMap<String, Double>();
String line;
int numLinesInGrammar = 0;
while((line=FileUtilityOld.readLineLzf(reader))!=null){
String[] fds = line.split("\\s+\\|{3}\\s+");// lhs ||| foreign ||| english ||| feature values
StringBuffer featKey = new StringBuffer();
if(includeLHS){
featKey.append(fds[0]);
featKey.append(" ||| ");
}
if(includeForeign){
featKey.append(fds[1]);
featKey.append(" ||| ");
}
if(includeEnglish){
featKey.append(fds[2]);
featKey.append(" ||| ");
}
featureTbl.put(featKey.toString(), initWeight);
numLinesInGrammar++;
}
FileUtilityOld.closeReadFile(reader);
//== write the feature file
BufferedWriter writer = FileUtilityOld.getWriteFileStream(featureFile ,"UTF-8");
for(String featKey : featureTbl.keySet()){
double featWeight = featureTbl.get(featKey);
FileUtilityOld.writeLzf(writer, featKey + featWeight +"\n");
}
FileUtilityOld.closeWriteFile(writer);
System.out.println("numLinesInGrammar= " + numLinesInGrammar);
System.out.println("numFeature= " + featureTbl.size());
}
}