/*
* Created on : 07-11-2013
* Author : Bastian Weinlich
*/
package de.hpi.i2b2.girix;
import java.io.*;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.rosuda.REngine.REXP;
import org.rosuda.REngine.REXPLogical;
import org.rosuda.REngine.REXPMismatchException;
import org.rosuda.REngine.REngine;
import org.rosuda.REngine.REngineEvalException;
import org.rosuda.REngine.REngineException;
import org.rosuda.REngine.Rserve.RConnection;
import edu.harvard.i2b2.common.exception.I2B2Exception;
import de.hpi.i2b2.girix.GIRIXUtil;
// This class interacts directly with JRI (Java R Interface) library
public class JRIProcessor {
private static Log log = LogFactory.getLog(JRIProcessor.class);
private RConnection re = null;
private static StringBuffer Routput;
private static StringBuffer Rerrors;
private static final int port = 6311;
private static final String url = "127.0.0.1";
public JRIProcessor() throws I2B2Exception {
initializeR();
// Look if there's an existing R engine...
try {
re = new RConnection(url, port);
} catch (REngineException e) {
throw new I2B2Exception("Rserve not listening or connection refused");
}
/*// If not create a new one
if (re == null) {
log.info("Creating new R engine");
// Create new R engine but don't start main loop immediately (second argument)
re = (JRIEngine) JRIEngine.createEngine(args, new ScriptExecutorCallbackClass(), false);
} else {
log.info("R engine already exists");
} */
// Load required R package 'xtable'
try {
re.voidEval("library(xtable)");
} catch (REngineException e) {
throw new I2B2Exception("Package 'xtable' not installed");
}
}
public static void initializeR() throws I2B2Exception {
// Set some system settings that are required for running R
//GIRIXUtil.setUpREnvironment();
// Make sure we have the right version of everything
// boolean versionOK = Rengine.versionCheck();
// if (!versionOK) {
// log.error("R version error");
// throw new I2B2Exception("Error delivered from server: R version error");
// }
// Don't do/show unnecessary things (save/restore workspace etc.)
//String[] args = {"--vanilla", "-q"};
log.info("Starting R...");
if(!RserveSpawner.checkLocalRserve(port)) throw new I2B2Exception("Rserve failed to start");
}
// Do some preparation inside the R session for later output (plots, csvs, variables)
public File prepare(String webDirPath) throws I2B2Exception {
String plotDirPath = webDirPath + "/plots";
String csvDirPath = webDirPath + "/csv";
// Clear old output / errors
Routput = new StringBuffer(200);
Rerrors = new StringBuffer(200);
// ========= Plots =========
File f = new File(plotDirPath);
// Check if directory exists and if permissions are sufficient
if ( ! f.exists()) {
if (! f.mkdirs()) {
log.error("Error while creating plot directory");
throw new I2B2Exception("Error while creating plot directory");
}
}
if ( ! (f.exists() && f.isDirectory() && f.canRead() && f.canWrite()) ) {
log.error("Error with plot directory: " + plotDirPath);
throw new I2B2Exception("Error delivered from server: Plot directory not available");
}
// Clear old plot files
for (File plotfile : f.listFiles()) {
plotfile.delete();
}
// Set up R to save plots as svg files in the given plot directory
try {
re.voidEval("svg(\"" + plotDirPath + "/plot%03d.svg\")");
}
catch (REngineException e) {
log.error("Error while setting plot dir path in R");
throw new I2B2Exception("Error delivered from server: Setting plot directory path in R");
}
// ========= CSVs =========
File f2 = new File(csvDirPath);
// Check if directory exists and if permissions are sufficient
if ( ! f2.exists()) {
if (! f2.mkdirs()) {
log.error("Error while creating csv directory");
throw new I2B2Exception("Error while creating csv directory");
}
}
if ( ! (f2.exists() && f2.isDirectory() && f2.canRead() && f2.canWrite()) ) {
log.error("Error with csv directory: " + plotDirPath);
throw new I2B2Exception("Error delivered from server: CSV directory not available");
}
// Clear old csv files
for (File csvfile : f2.listFiles()) {
csvfile.delete();
}
// ========= Create data structures (vectors) =========
try {
re.voidEval("girix.patients <- c()");
re.voidEval("girix.observations <- c()");
re.voidEval("girix.input <- c()");
re.voidEval("girix.output <- list()");
re.voidEval("girix.concept.names <- c()");
re.voidEval("girix.modifiers <- c()");
re.voidEval("girix.events <- c()");
re.voidEval("girix.observers <- c()");
}
catch (REngineException e) {
log.error("Error with setting up new vectors in R");
throw new I2B2Exception("Error delivered from server: Creating vectors");
}
// ========= Handling dates and times =========
// Define an i2b2 DateTime Class, a helper function and a conversion function for the database DateTime string
// -> Time is also considered
try {
re.voidEval("setClass(\"i2b2DateTime\")");
re.voidEval("girix.swapPlusMinus <- function(x) if (!is.na(x)){if(x==\"-\") {\"+\"} else {\"-\"}}");
re.voidEval("setAs(\"character\",\"i2b2DateTime\", function(from){do.call(c,lapply(from, function(x) {as.POSIXlt(x, tz = paste(\"GMT\", girix.swapPlusMinus(substr(x,24,24)), substr(x,26,26), sep=\"\"), format=\"%Y-%m-%dT%H:%M:%S\")}))})");
} catch (REngineException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return f;
}
// Read in patient data
public void readDataFrameFromString(String name, GIRIXCSVContainer s, String colClasses) throws I2B2Exception {
// Uncomment for debugging purposes
// log.info(name + "\n\n" + s.getString());
// Case: No data available -> Initialize empty data.frame (read.table would cause an error otherwise) and return
if (!s.hasData()) {
String initStr = s.getString().replace(GIRIXUtil.SEP, "=character(),");
initStr = initStr.concat("=character()");
try {
re.voidEval(name + " <- data.frame(" + initStr + ")");
} catch (REngineException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return;
}
try {
re.voidEval("tmp <- " + s.getString());
} catch (REngineException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
re.voidEval(name + " <- read.table(textConnection(tmp), sep=\"" + GIRIXUtil.SEP + "\", header=T, row.names=NULL, quote=\"\\\"\"," +
"colClasses = " + colClasses + ", na.string=c(\"\"))");
}
catch (REngineException e) {
log.error("Error reading in patient data into data.frame " + name);
throw new I2B2Exception("Error delivered from server: Reading in patient data");
}
try {
re.voidEval("rm(tmp)");
} catch (REngineException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// Assign additional input parameters in R
public void assignAdditionalInput(Map<String, String> m) throws I2B2Exception {
// Assign additional input variables as strings
for (Map.Entry<String, String> entry : m.entrySet()) {
// Do some replacements in order to prevent errors and security flaws
String key = entry.getKey().replace("\\", "\\\\");
key = key.replace("\"", "\\\"");
String value = entry.getValue().replace("\\", "\\\\");
value = value.replace("\"", "\\\"");
try {
re.voidEval("girix.input[\"" + key + "\"] <- \"" + value + "\"");
}
catch (REngineException e) {
log.error("Error assigning additional inputs");
throw new I2B2Exception("Error delivered from server: Reading in additional input values");
}
}
}
// Make the names of the chosen concepts visible in R
public void assignConceptNames(String[] names) {
for (int i = 0; i < names.length; i++) {
String sanitized = names[i].replace("\\", "\\\\");
sanitized = sanitized.replace("\"", "\\\"");
try {
re.voidEval("girix.concept.names[" + (i+1) + "] <- \"" + sanitized + "\"");
} catch (REngineException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void setWorkingDirectory(String scriptletDirectoryPath) {
try {
re.voidEval("setwd(\"" + scriptletDirectoryPath + "\")");
} catch (REngineException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void executeRScript(String scriptPath) throws I2B2Exception {
try {
re.parseAndEval("source(\"" + scriptPath + "\", local=TRUE)");
} catch (REngineException | REXPMismatchException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public List<GIRIXOutputVariable> getOutputVariables(List<String[]> outputParametersList, String webPath) throws I2B2Exception {
// Array has 4 elements: Name, description, type, value
List<GIRIXOutputVariable> l = new LinkedList<GIRIXOutputVariable>();
// Get default output variables
int i = 1;
try {
while(true) {
getOrEval(re, "girix.output." + i);
String name = "girix.output." + (i); // Default name
GIRIXOutputVariable oV = new GIRIXOutputVariable(name, "", getType(name), extractResult(name, webPath + "/csv", name));
l.add(oV);
i++;
}
} catch(REXPMismatchException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (REngineException e) {
//Ignore because an exception will be thrown anyway
}
// Get custom (user defined) output variables
for (String[] oElement : outputParametersList) {
try {
// Replacements to prevent errors / security flaws
String oName = oElement[0].replace("\\", "\\\\");
oName = oName.replace("\"", "\\\"");
String Rname = "girix.output[[\"" + oName + "\"]]"; // Name to access output variable in R
getOrEval(re, Rname);
GIRIXOutputVariable oV = new GIRIXOutputVariable(
oElement[0],
oElement[1],
getType(Rname),
extractResult(Rname, webPath + "/csv", oName)
);
l.add(oV);
}
catch (REngineException | REXPMismatchException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return l;
}
// Check if output is table-like
private String getType(String name) throws I2B2Exception {
try {
REXPLogical df = (REXPLogical) re.parseAndEval("is.data.frame(" + name + ")");
REXPLogical mat = (REXPLogical) re.parseAndEval("is.matrix(" + name + ")");
// If it is a data.frame...
if (df.isTRUE()[0]) {
return "data.frame";
} else if(mat.isTRUE()[0]) {
return "matrix";
} else {
return "other";
}
}
catch (REngineException | REXPMismatchException e) {
log.error("Error while getting type of output variable");
throw new I2B2Exception("Error delivered from server: Determining data type of output variable");
}
}
// Create HTML table code and a csv file if it is a table-like R type
// Otherwise just return the result value as a string
private String extractResult(String name, String csvPath, String filename) throws I2B2Exception {
String type = getType(name);
if (type.equals("data.frame") || type.equals("matrix")) {
// This is a workaround for a bug in xtable library (Date columns produce an error)
// See http://stackoverflow.com/questions/8652674/r-xtable-and-dates for details
try {
re.voidEval("xtable <- function(x, ...) {\n" +
"for (i in which(sapply(x, function(y) !all(is.na(match(c(\"POSIXt\",\"Date\"),class(y))))))) x[[i]] <- as.character(format(x[[i]], format=\"%Y-%m-%d %H:%M:%S\"))\n" +
"xtable::xtable(x, ...)\n}\n");
}
catch (REngineException e) {
log.error("Error while creating function as xtable workaround.");
throw new I2B2Exception("Error delivered from server: xtable workaround");
}
// Write csv file into the web directory
// This workaround ensures that every DateTime has the same representation in the .csv file
// (without this the time would be ommited if it is midnight)
try {
re.voidEval("girix.tmptable <- as.data.frame(lapply(" + name + ", function(x) if (is(x, \"POSIXt\")) format(x, \"%Y-%m-%d %H:%M:%S\") else x))");
re.voidEval("write.table(girix.tmptable, file = \"" + csvPath + "/" + filename + ".csv\", append = FALSE, quote=which(sapply(" + name + ", function(x) !is.numeric(x) & !is(x, \"POSIXt\")))," +
" sep = \";\", eol = \"\\r\\n\", na = \"NULL\", dec = \",\", row.names = FALSE, col.names = TRUE, qmethod=\"double\", fileEncoding = \"UTF-8\")");
re.voidEval("rm(girix.tmptable)");
}
catch (REngineException e) {
log.error("Error while writing csv file for table " + name);
throw new I2B2Exception("Error delivered from server: Writing csv file");
}
// Now create the HTML code of the table structure
try {
REXP ret = re.parseAndEval("paste(capture.output(print(xtable(" + name + "), type = \"html\")), collapse=\"\")");
return ret.asString();
} catch (REngineException | REXPMismatchException e) {
try {
re.voidEval("write(\"Error while trying to create HTML code out of table " + name + " \n\", stderr())");
} catch (REngineException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
return "undefined";
}
} else {
try {
REXP ret = re.parseAndEval("toString(" + name + ")");
return ret.asString();
}
catch (REngineException | REXPMismatchException e) {
log.error("Error while extracting results (other)");
throw new I2B2Exception("Error delivered from server: Extracting result value as string");
}
}
}
public void doFinalRTasks(String webPath) throws I2B2Exception {
// Create RImage directory if not existing
File f = new File(webPath + "/RImage/");
if ( ! f.exists()) {
if (! f.mkdirs()) {
log.error("Error while creating RImage directory");
throw new I2B2Exception("Error while creating RImage directory");
}
}
// Write plot files, write R workspace image and clear workspace
try {
re.voidEval("dev.off()");
re.voidEval("save.image(file=\"" + webPath + "/RImage/RImage" + "\")");
re.voidEval("rm(list = ls())");
}
catch (REngineException e) {
log.error("Error while doing final tasks");
throw new I2B2Exception("Error delivered from server: Doing final R tasks");
}
// End R thread
re.close();
}
private static REXP getOrEval(REngine rengine, String cmd) throws REngineException, REXPMismatchException {
REXP ret;
try {
ret = rengine.parseAndEval(cmd);
} catch(REngineEvalException e) {
ret = rengine.get(cmd, null, true);
}
return ret;
}
// Following methods are used to access the strings saving R output / error stream
public static void appendROutput(String s) {
Routput.append(s);
}
public static String getROutput() {
return Routput.toString();
}
public static void appendRErrors(String s) {
Rerrors.append(s);
}
public static String getRErrors() {
return Rerrors.toString();
}
}