package edu.stanford.nlp.parser.ensemble;
import edu.stanford.nlp.parser.ensemble.utils.Now;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.commons.io.FileUtils;
import org.maltparser.core.helper.SystemLogger;
public class RunnableTrainJob extends BaseModelRunnableJob implements Runnable {
public RunnableTrainJob(Ensemble ensemble, int index) {
super(ensemble, index);
}
@Override
public void run() {
try {
SystemLogger.logger().info("Starting job " + ensemble.baseModels[baseModelIndex] + " at " + new Now() + "...\n");
createWorkingDirectory();
// args for malt
String params = makeMaltEngineParameters();
// run malt
Process p = Runtime.getRuntime().exec("java -Xmx2048m -jar lib" + File.separator + "maltParser.jar " + params);
BufferedReader stdError = new BufferedReader(new InputStreamReader(
p.getErrorStream()));
String s;
if (ensemble.multiThreadTrain == true) {
while ((s = stdError.readLine()) != null) {
System.out.println(ensemble.baseModels[baseModelIndex] + ") " + s);
}
}
else {
while ((s = stdError.readLine()) != null) {
System.out.println(s);
}
}
// move model file from working directory to model directory
File origModel = new File(workingDirectory + File.separator + ensemble.modelName + "-" + baseModel + ".mco");
File savedModel = new File(ensemble.modelDirectory + File.separator + ensemble.modelName + "-" + baseModel + ".mco");
try {
FileUtils.copyFile(origModel, savedModel);
SystemLogger.logger().info("Model file for job " + baseModel + " saved as: " + savedModel.getAbsolutePath() + "\n");
origModel.delete();
} catch(Exception ex) {
SystemLogger.logger().error("ERROR: failed to save model file for job " + baseModel + ". The actual model file might be here: " + origModel.getAbsolutePath() + "\n");
}
ensemble.threadFinished();
SystemLogger.logger().info("Ended job " + baseModel + " at " + new Now() + ".\n");
SystemLogger.logger().info("-----------------------------------------------------------------------------\n");
}
catch (IOException e) {
e.printStackTrace();
}
}
private String makeMaltEngineParameters() {
StringBuilder pars = new StringBuilder();
// flowchart: Flow chart
// -learn [Learn a Single MaltParser configuration]
pars.append("-m ");
pars.append("learn");
// name: Configuration name
pars.append(" -c ");
pars.append(ensemble.modelName).append("-").append(baseModel);
// learner: Learner
// - libsvm [LIBSVM learner]
// - liblinear [LIBLINEAR learner]
pars.append(" -l ");
pars.append("liblinear");
// options: LIBLINEAR options (see liblinear Documentation)
pars.append(" -lo ");
pars.append(ensemble.libLinearOptions);
// verbosity: Verbosity of the liblinear or the libsvm package
// - silent [No output from the liblinear or the libsvm package is logged.]
// - error [Only the error stream of the liblinear or the libsvm package is logged.]
// - all [All output of the liblinear or the libsvm package is logged.]
pars.append(" -lv ");
pars.append(ensemble.libLinearLogLevel);
// verbosity: Verbosity level
// - off [Logging turned off]
// - fatal [Logging of very severe error events]
// - error [Logging of error events]
// - warn [Logging of harmful situations]
// - info [Logging of informational messages]
// - debug [Logging of debugging messages]
pars.append(" -v ");
pars.append(ensemble.logLevel);
// data_split_column: Data split input column
pars.append(" -d ");
pars.append(ensemble.dataSplitColumn);
// data_split_threshold: Data split threshold [Default = 50]
pars.append(" -T ");
pars.append(Integer.toString(ensemble.dataSplitThreshold));
// data_split_structure: Data split data structure
pars.append(" -s ");
if (baseModel.startsWith("nivre")) {
pars.append("Input[0]");
} else if (baseModel.startsWith("cov")) {
pars.append("Right[0]");
} else {
throw new RuntimeException("Unknown base model: " + baseModel);
}
// parsing_algorithm: Parsing algorithm
// - nivreeager [Nivre arc-eager]
// - nivrestandard [Nivre arc-standard]
// - covnonproj [Covington non-projective]
// - covproj [Covington projective]
// - stackproj [Stack projective]
// - stackeager [Stack eager]
// - stacklazy [Stack lazy]
// - planar [Planar eager]
// - 2planar [2-Planar eager]
pars.append(" -a ");
// remove (*-ltr) or (*-rtl) from end of algorithm name
int dashPos = baseModel.lastIndexOf("-");
assert (dashPos > 0 && dashPos < baseModel.length());
pars.append(baseModel.substring(0, dashPos));
// workingdir: Working directory
pars.append(" -w ");
pars.append(workingDirectory.getAbsolutePath());
// infile: Path to input file
pars.append(" -i ");
if (leftToRight) {
if (pseudo_projective) {
// pseudo-projective trainCorpus
File origFile = new File(ensemble.trainCorpus);
File ppFile = new File(ensemble.workingDirectory + File.separator + origFile.getName() + ".pp");
pars.append(ppFile.getAbsolutePath());
}
else
pars.append(ensemble.trainCorpus);
} else {
// Reverse trainCorpus
File origFile = new File(ensemble.trainCorpus);
if (pseudo_projective) {
File ppReversedFile = new File(ensemble.workingDirectory + File.separator + origFile.getName() + ".pp.reversed");
pars.append(ppReversedFile.getAbsolutePath());
}
else {
File reversedFile = new File(ensemble.workingDirectory + File.separator + origFile.getName() + ".reversed");
pars.append(reversedFile.getAbsolutePath());
}
}
// external: Path to train or svm-train
if (ensemble.libLinearTrain != null && ensemble.libLinearTrain.length() > 0) {
pars.append(" -lx ");
pars.append(ensemble.libLinearTrain);
}
// features: Feature model specification
if (Const.TRAIN_EXTENDED) {
pars.append(" -F ");
if (featureModel.equals("<default>")) {
if (baseModel.startsWith("nivreeager-")) {
pars.append("nivreeager");
} else if (baseModel.startsWith("nivrestandard-")) {
pars.append("nivrestandard");
} else if (baseModel.startsWith("covnonproj-")) {
pars.append("covnonproj");
} else {
throw new RuntimeException("Unknown base model: " + baseModel);
}
}
else {
pars.append(ensemble.workingDirectory).append(File.separator).append(featureModel);
}
}
return pars.toString();
}
}