/**
* Changes the non-projective a corpus to pseudo-projective.
*/
package edu.stanford.nlp.parser.ensemble.utils;
import java.io.*;
import org.maltparser.core.exception.MaltChainedException;
/**
*
* @author Mojtaba Khallash
*/
public class ProjectivizeCorpus {
public static PrintStream out = System.out;
public static void Projectivize(String workingDirectory, String input,
String Output, String model) throws MaltChainedException,
IOException {
Projectivize(workingDirectory, input, Output, model, "head");
}
public static void Projectivize(String workingDirectory, String input,
String Output, String model, String markingStrategy)
throws MaltChainedException, IOException {
Projectivize(workingDirectory, input, Output, model, markingStrategy, "head");
}
public static void Projectivize(String workingDirectory, String input,
String Output, String model, String markingStrategy,
String coveredRoot) throws MaltChainedException, IOException {
Projectivize(workingDirectory, input, Output, model, markingStrategy, coveredRoot, "shortest");
}
public static void Projectivize(String workingDirectory, String input,
String Output, String model, String markingStrategy,
String coveredRoot, String liftingOrder)
throws MaltChainedException, IOException {
// args for malt
String params = makeMaltEngineParameters(workingDirectory, input,
Output, model, markingStrategy, coveredRoot, liftingOrder);
// run malt
Process p = Runtime.getRuntime().exec("java -Xmx2048m -jar lib" + File.separator + "maltParser.jar " + params);
BufferedReader stdError = new BufferedReader(new InputStreamReader(
p.getErrorStream()));
String s;
while ((s = stdError.readLine()) != null) {
out.println(s);
}
}
private static String makeMaltEngineParameters(String workingDirectory, String input,
String Output, String model, String markingStrategy,
String coveredRoot, String liftingOrder) {
StringBuilder pars = new StringBuilder();
// Config Name = Pseudo-Projectivity
pars.append("-c ");
pars.append(model);
// Processing Mode = Projectivize
pars.append(" -m ");
pars.append("proj");
// Input Config [file path - format]
pars.append(" -i ");
pars.append(workingDirectory).append(File.separator).append(input);
// Output Config [file path - format]
pars.append(" -o ");
pars.append(workingDirectory).append(File.separator).append(Output);
// Marking Strategy [none, baseline, head, path, head+path]
pars.append(" -pp ");
pars.append(markingStrategy);
// Covered Root [none - left - right - head - ignore]
pars.append(" -pcr ");
pars.append(coveredRoot);
// Lifting Order [shortest - deepest]
pars.append(" -plo ");
pars.append(liftingOrder);
// Working Directory Path
pars.append(" -w ");
pars.append(workingDirectory);
return pars.toString();
}
}