package com.cognitionis.external_tools;
import java.io.*;
import com.cognitionis.utils_basickit.FileUtils;
/**
* REGULAR INSTALLATION AND INCLUSION IN PATH REQUIRED
* @author Hector Llorens
* @since 2011
*/
public class SRL_Roth {
private static String program_path = FileUtils.getApplicationPath() + "program-data/SRL_Roth/";
private static String program_bin = program_path + "srl-client-primitive.pl";
private static String program_bin2 = program_path + "roth_to_conll";
/**
* Runs SRL_Roth over plain text.
* Recommended: One sentence\n\n
* and saves the output in a .roth file (PipesFile)
*
* Format Token|POS|...
*
* @param filename
* @return Output filename
*/
public static String run(String filename) {
return run(filename, 1);
}
/**
* Runs SRL_Roth over a plain or tokenized file: one token one blank & one sentenc per line
* and saves the output in a .roth file (PipesFile)
*
* tokenize: (0) not tokenize, (1) tokenize
*
* Format Token|POS|...
*
* @param filename
* @param tokenize
* @return Output filename
*/
public static String run(String filename, int tokenize) {
String outputfile = filename + ".roth";
try {
// IMP: It is better to let UTF-8 fail than translate to crappy ISO that misses some chars
// then things like " – " are imposible to pair because tokens are missing and there are spaces in the middle
// IMP2: Since the other solution is even worse... we stay with ISO-translation and break if unpairable...
String[] command = {"/bin/sh","-c","cat \""+filename+"\" | sed \"s/[|]/-/g\" | sed \"s/\\([^[:blank:]]\\)-\\([^[:blank:]]\\)/\\1 - \\2/g\" | iconv -c -t iso-8859-1 | perl \""+program_bin+"\" "+String.valueOf(tokenize)+" 1 | "+program_bin2+" -f rothcomplete | iconv -c -f iso-8859-1 | sed \"s/^[[:blank:]]*\\$/|/\""};
//String[] command = {"/bin/sh","-c","cat \""+filename+"\" | "+program_bin+" "+String.valueOf(tokenize)+" 1 | "+program_bin2+" -f rothcomplete | sed \"s/^[[:blank:]]*\\$/|/\""};
Process p = Runtime.getRuntime().exec(command);
BufferedWriter output = new BufferedWriter(new FileWriter(outputfile));
BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
try {
String line;
while ((line = stdInput.readLine()) != null) {
output.write(line + "\n");
}
} finally {
if (stdInput != null) {
stdInput.close();
}
if (output != null) {
output.close();
}
if(p!=null){
p.getInputStream().close();
p.getOutputStream().close();
p.getErrorStream().close();
p.destroy();
}
}
} catch (Exception e) {
System.err.println("Errors found (SRL_Roth):\n\t" + e.toString());
if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
e.printStackTrace(System.err);
System.exit(1);
}
return null;
}
return outputfile;
}
}