/**
*
*/
package edu.berkeley.nlp.PCFGLA;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import edu.berkeley.nlp.PCFGLA.ConditionalTrainer.Options;
/**
* @author petrov
*
*/
public class ConditionalPipeline {
public static boolean initializeWithZero = true;
public static void main(String[] args) {
OptionParser optParser = new OptionParser(ConditionalTrainer.Options.class);
Options opts = (Options) optParser.parse(args, true);
// provide feedback on command-line arguments
//System.out.println("Calling with " + optParser.getPassedInOptions());
String dirName = opts.outDir;
String baseName = "split_";
File directory = new File(dirName);
if (!directory.mkdir()) System.out.println("Failed to make directory.");
File nextFile = null;
// first train an x-bar generative grammar
List<String> baselineArgsList = new ArrayList<String>(Arrays.asList(new String[]{"-path", opts.path, "-trfr", "" + opts.trainingFractionToKeep, "-treebank", opts.treebank+"", "-out", dirName+"/"+"base_gen.gr", "-baseline", "-maxL", opts.maxL+"","-b",opts.binarization+""}));
if (opts.markUnaryParents)
baselineArgsList.add("-markUnaryParents");
if (opts.markUnaryParents)
baselineArgsList.add("-filterStupidFrickinWHNP");
if (opts.collapseUnaries)
baselineArgsList.add("-collapseUnaries");
String[] baselineArgs = baselineArgsList.toArray(new String[]{});
nextFile = new File(dirName+"/"+"base_gen.gr");
if (opts.initializeDir == null){
if (!nextFile.exists() || opts.dontLoad) ConditionalTrainer.main(baselineArgs);
else System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
// now compute constraints with x-bar generative grammar
String[] consArgsTrain = addOptions(args, new String[]{"-out", dirName+"/"+baseName+"0", "-in", dirName+"/"+"base_gen.gr", "-outputLog", dirName+"/"+baseName+"0.cons.log"});
nextFile = new File(dirName+"/"+baseName+"0-0.data");
if (nextFile.exists() && !opts.dontLoad) System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
else {
ParserConstrainer.main(consArgsTrain);
consArgsTrain = addOptions(args, new String[]{"-out", dirName+"/"+baseName+"0_dev", "-in", dirName+"/"+"base_gen.gr", "-section", "dev", "-nChunks", "1", "-outputLog", dirName+"/"+baseName+"0_dev.cons.log"});
ParserConstrainer.main(consArgsTrain);
consArgsTrain = addOptions(args, new String[]{"-out", dirName+"/"+baseName+"0_test", "-in", dirName+"/"+"base_gen.gr", "-section", "final", "-nChunks", "1", "-outputLog", dirName+"/"+baseName+"0_test.cons.log"});
ParserConstrainer.main(consArgsTrain);
}
}
// then train an x-bar generative grammar with the simple lexicon
nextFile = new File(dirName+"/"+baseName+"0.gr");
String[] baselineCondArgs = null;
if (opts.initializeDir != null)
{
baselineCondArgs = addOptions(args, new String[]{"-out", nextFile.toString(), /*"-baseline",*/ "-cons", opts.initializeDir + "/" + baseName + "0", "-in", opts.initializeDir + "/" + baseName + "0.gr", "-doNOTprojectConstraints", "-noSplit", "-doConditional"});//,
}
else
{
baselineCondArgs = addOptions(args, new String[] { "-out",
nextFile.toString(), /* "-baseline", */"-cons",
dirName + "/" + baseName + "0",
initializeWithZero ? "-initializeZero" : "",
"-doNOTprojectConstraints", "-noSplit", "-doConditional" });// ,
}
if (!nextFile.exists() || opts.dontLoad) {
ConditionalTrainer.main(baselineCondArgs);
if (opts.testAll){
System.out.println("Testing all grammars to determine which one was the best and should be split next");
String[] testArgs = new String[]{"-doNOTprojectConstraints", "-cons", dirName+"/"+baseName+"0_dev-0.data", "-testAll", "-path", opts.path, "-in", baseName+"0.gr", "-filePath", opts.outDir, "-treebank", opts.treebank+"", "-maxL", opts.maxL+"", "-parser", "plain", "-nProcess", opts.nProcess+""};
GrammarTester.main(testArgs);
}
}
else System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
// loop:
for (int split=1; split<=6; split++){
System.out.println("\n\nIn "+split+". Split-Iteration.");
String previousGrammar = dirName+"/"+baseName+(split-1);
String currentGrammar = dirName+"/"+baseName+split;
// split grammar and train it
String[] trainArgs = null;
if (opts.initializeDir == null)
{nextFile = new File(currentGrammar + ".gr");
trainArgs = addOptions(args, new String[]{"-in", previousGrammar+".gr", "-doConditional", "-cons", previousGrammar, "-out", nextFile.toString()});//, "-sigma", Math.pow(split,1.5)+""});" +
}
else
{nextFile = new File(currentGrammar + ".gr");
trainArgs = addOptions(args, new String[]{"-in", opts.initializeDir+"/"+baseName+(split) +".gr", "-doConditional", "-noSplit", "-cons", opts.initializeDir+"/"+baseName+(split-1), "-out", nextFile.toString()});//, "-sigma", Math.pow(split,1.5)+""});" +
}
if (!nextFile.exists() || opts.dontLoad) {
ConditionalTrainer.main(trainArgs);
if (opts.testAll){
System.out.println("Testing all grammars to determine which one was the best and should be split next");
String[] testArgs = new String[]{"-cons", dirName+"/"+baseName+(split-1)+"_dev-0.data", "-testAll", "-path", opts.path, "-in", baseName+split+".gr", "-filePath", opts.outDir, "-treebank", opts.treebank+"", "-maxL", opts.maxL+"", "-parser", "plain", "-nProcess", opts.nProcess+""};
GrammarTester.main(testArgs);
}
}
else System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
// compute constraints with new grammar
if (opts.initializeDir == null)
{
nextFile = new File(currentGrammar + "-0.data");
if (nextFile.exists() && !opts.dontLoad) {
System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
} else {
String[] consArgs = addOptions(args, new String[]{"-cons", previousGrammar, "-out", currentGrammar, "-in", currentGrammar+".gr", "-outputLog", currentGrammar+".cons.log"});
ParserConstrainer.main(consArgs);
consArgs = addOptions(args, new String[]{"-cons", previousGrammar+"_dev", "-out", currentGrammar+"_dev", "-in", currentGrammar+".gr", "-section", "dev", "-nChunks", "1", "-outputLog", currentGrammar+"_dev.cons.log"});
ParserConstrainer.main(consArgs);
consArgs = addOptions(args, new String[]{"-cons", previousGrammar+"_test", "-out", currentGrammar+"_test", "-in", currentGrammar+".gr", "-section", "final", "-nChunks", "1", "-outputLog", currentGrammar+"_test.cons.log"});
}
}
}
System.exit(0);
}
private static String[] addOptions(String[] a, String[] b) {
String[] res = new String[a.length+b.length];
for (int i=0; i<a.length; i++){
res[i] = a[i];
}
for (int i=0; i<b.length; i++){
res[i+a.length] = b[i];
}
return res;
}
}