/* $RCSfile$ * $Author$ * $Date$ * $Revision$ * * Copyright (C) 2004-2008 Rajarshi Guha <rajarshi.guha@gmail.com> * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */ package org.openscience.cdk.qsar.model.R2; import java.awt.FileDialog; import java.awt.Frame; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.InputStreamReader; import java.io.StringWriter; import java.util.HashMap; import java.util.Iterator; import java.util.Random; import java.util.Set; import org.openscience.cdk.qsar.model.IModel; import org.openscience.cdk.qsar.model.QSARModelException; import org.openscience.cdk.tools.ILoggingTool; import org.openscience.cdk.tools.LoggingToolFactory; import org.rosuda.JRI.REXP; import org.rosuda.JRI.RList; import org.rosuda.JRI.RMainLoopCallbacks; import org.rosuda.JRI.Rengine; /** * Base class for the R-CDK interface. * <p/> * This class provides the basis for all classes that wish to interface with * R functions from a CDK program. * <p/> * Since the R engine is multi-threaded only one instance of the R session can exist * for a given Java process. This implies that initialization must be perfored exactly once * within a Java process. This class ensure that this occurs. * <p/> * In addition, this class loads some helper functions into the R session. The loading * can be via a temporary file (the default) or via a String, which may be useful in * webservice scenarios. * <p/> * <b>Requirement</b> The class (and implementing subclasses) is dependent on the * <a href="http://rosuda.org/JRI/">JRI</a> library. This provides an interface to R * for Java code. Though the <a href="http://rosuda.org/rJava/">rJava</a> for R * includes JRI, the code here is only dependent on JRI and does not attempt to * go from R to Java. Hence rJava is not a requirement. To compile this code, the CDK * includes the JRI jar file. However to run the code, the JRI native library (libjri.so * on Linux) must be located in the users LD_LIBRARY_PATH. Also the versions of the JRI Java * API and native library should match and this is checked for. * <p/> * Currently the CDK uses JRI 0.3 (available from <a href=" http://rosuda.org/R/nightly/other/JRI_0.3-0.tar.gz">here</a>) * <p/> * <p/> * <b>Implementation Notes</b> * <ul> * <li>If the user requires other initializations the only way to do so at * this point is to edit <code>helper.R</code> or perform the initialization by hand * <li>An implementing class must call <code>super()</code> * <li>Though this class provides a field to store the R model object as a * <code>RList</code> the actual R variable will remain in the R session. This is useful * for saving the model as a .Rda file at one point. Also by storing the model on the R * side we do not not need to make repeated queries on the model via <code>eval()</code>. * <li>Subclasses of this class are generally Java front-ends to a specific R model type * (such as linear regression, CNN etc.). Thus each subclass should provide getter methods * for the various components of such an object. Since this is tedious to do by hand, * you can use the <code>stubs.R</code> script that comes with the CDK distribution to * generate source code for the getter methods for the individual components of an R model * object. Note, that the script currently ignores objects of classes <code>'call'</code> * and <code>'formula'</code>. * </ul> * <p/> * <b>NOTE</b>: For the R backend to work, ensure that R is correctly installed. * Other requirements are * <ul> * <li>LD_LIBRARY_PATH should include the directory that contains <code>libjri.so</code> as well * as the dierctory that contains <code>libR.so</code> * <li>R_HOME should be set to the appropriate location * </ul> * * @author Rajarshi Guha * @cdk.require r-project * @cdk.require JRI.jar * @cdk.module qsar * @cdk.githash * @cdk.keyword R * @cdk.keyword JRI */ public abstract class RModel implements IModel { private String modelName = null; protected RList modelObject = null; protected HashMap params = null; /** * The object that performs the calls to the R engine. */ protected static Rengine rengine = null; /** * A boolean that indicates whether the R/Java subsystem has been initialized or not. */ private static boolean doneInit = false; private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(RModel.class); private void checkEnvironmentVariables() throws QSARModelException { String rhome = System.getenv("R_HOME"); String ldlibrarypath = System.getenv("LD_LIBRARY_PATH"); if (rhome == null || rhome.length() == 0 || ldlibrarypath == null || ldlibrarypath.length() == 0) { throw new QSARModelException( "Cannot find R: R_HOME and LD_LIBRARY_PATH are not set." ); } } private void initRengine(String[] args, boolean useDisk) throws QSARModelException { if (!doneInit) { rengine = new Rengine(args, false, new TextConsole()); if (!rengine.waitForR()) { throw new QSARModelException("Could not load rJava"); } else { logger.debug("Started R"); } doneInit = true; if (useDisk) { loadRFunctions(rengine); logger.info("Initializing from disk"); } else { loadRFunctionsAsStrings(rengine); logger.info("Initializing from strings"); } logger.info("rJava initialized"); } else { logger.info("rjava already intialized"); } } private void loadRFunctions(Rengine engine) { // File.separator is used to be system independent // Fix me: After creating a jar file it don't work on a windwos OS // but within eclipse it won't work on while working with '/' on windows OS // No idea how to solve this // String scriptLocator = "org" + File.separator + "openscience" + // File.separator + "cdk" + File.separator + "qsar" + File.separator + // "model" + File.separator + "data" + File.separator + "helper.R"; String scriptLocator = "org/openscience/cdk/qsar/model/data/helper.R"; try { File scriptFile = File.createTempFile("XXXXX", ".R"); scriptFile.deleteOnExit(); InputStreamReader reader = new InputStreamReader( this.getClass().getClassLoader().getResourceAsStream(scriptLocator)); BufferedReader inFile = new BufferedReader(reader); FileWriter outFile = new FileWriter(scriptFile); BufferedWriter outBuffer = new BufferedWriter(outFile); String inputLine; while ((inputLine = inFile.readLine()) != null) { outBuffer.write(inputLine, 0, inputLine.length()); outBuffer.newLine(); } outBuffer.close(); inFile.close(); outFile.close(); // Necessary for windows user, R needs a '/' in the path of a file even on windows String path = scriptFile.getAbsolutePath(); path = path.replaceAll("\\\\", "/"); engine.eval("source(\"" + path + "\")"); } catch (Exception exception) { logger.error("Could not load helper R script for JRI: ", scriptLocator); logger.debug(exception); } } private void loadRFunctionsAsStrings(Rengine evaluator) { String[] scripts = { "helper.R", }; String scriptPrefix = "org/openscience/cdk/qsar/model/data/"; for (int i = 0; i < scripts.length; i++) { String scriptLocator = scriptPrefix + scripts[i]; try { InputStreamReader reader = new InputStreamReader( this.getClass().getClassLoader().getResourceAsStream(scriptLocator)); BufferedReader inFile = new BufferedReader(reader); StringWriter sw = new StringWriter(); String inputLine; while ((inputLine = inFile.readLine()) != null) { sw.write(inputLine); sw.write("\n"); } sw.close(); evaluator.eval("eval(parse(text=\"" + sw.toString() + "\"))"); } catch (Exception exception) { logger.error("Could not load CDK-rJava R scripts: ", scriptLocator); logger.debug(exception); } } } /** * Initializes R with the <i>--vanilla, --quiet, --slave</i> flags. * <p/> * This constructor will initialize the R session via a temporary file or * from a String depending on whether the symbol <code>initRFromString</code> * is specified on the command line */ public RModel() throws QSARModelException { checkEnvironmentVariables(); // check that the JRI jar and .so match if (!Rengine.versionCheck()) { logger.debug("API version of the JRI library does not match that of the native binary"); throw new QSARModelException("API version of the JRI library does not match that of the native binary"); } params = new HashMap(); String[] args = {"--vanilla", "--quiet", "--slave"}; String initRFromString = System.getProperty("initRFromString"); boolean useDisk = true; if (initRFromString != null && initRFromString.equals("true")) { useDisk = false; } initRengine(args, useDisk); } /** * Saves a R model to disk. * <p/> * This function can be used to save models built in a session, and then loaded * again in a different session. * * @param modelName The name of the model as returned by \code{getModelName}. * @param fileName The file to which the model should be saved * @throws QSARModelException if the R session cannot save the model * @see #loadModel */ public void saveModel(String modelName, String fileName) throws QSARModelException { if (fileName == null || fileName.equals("")) { fileName = modelName + ".rda"; } rengine.assign("tmpModelName", modelName); rengine.assign("tmpFileName", fileName); REXP result = rengine.eval("saveModel(tmpModelName, tmpFileName)"); if (result == null) { logger.debug("Error in 'saveModel(tmpModelName, tmpFileName)'"); throw new QSARModelException("Error saving model"); } } /** * Get the name of the model. * <p/> * This function returns the name of the variable that the actual * model is stored in within the R session. In general this is * not used for the end user. In the future this might be changed * to a private method. * * @return A String containing the name of the R variable * @see #setModelName */ public String getModelName() { return (this.modelName); } /** * Set the name of the model. * <p/> * Ordinarily the user does not need to call this function as each model * is assigned a unique ID at instantiation. However, if a user saves a model * to disk and then later loads it, the loaded * model may overwrite a model in that session. In this situation, this method * can be used to assign a name to the model. * * @param newName The name of the model * @see #getModelName * @see #saveModel * @see #loadModel */ public void setModelName(String newName) { if (this.modelName != null && this.modelName.equals(newName)) return; String oldName = this.modelName; if (oldName != null) { rengine.eval("if ('" + oldName + "' %in% ls()) {" + newName + "<-" + oldName + ";rm(" + oldName + ")}"); } this.modelName = newName; } /** * Get the instance of the <code>Rengine</code>. * <p/> * In case the R engine has not been initialized, it is initialized before * returning the object. * * @return The Rengine object */ public static Rengine getRengine() { return rengine; } /** * Get the actual model object. * * @return An <code>RList</code> object representation of the model. */ public RList getModelObject() { return modelObject; } /** * Get a unique String value. * <p/> * This method can be used to get unique variable names for use in an R session. The * String is generated from a combination of the prefix, the system time and a random * portion. * * @param prefix Any value. If empty or null, <code>"var"</code> is used. * @return A unique String value */ public String getUniqueVariableName(String prefix) { if (prefix == null || prefix.equals("")) prefix = "var"; Random rnd = new Random(); long uid = ((System.currentTimeMillis() >>> 16) << 16) + rnd.nextLong(); return prefix + String.valueOf(Math.abs(uid)).trim(); } /** * Loads the parameters for a model into a <code>list</code> object in the R session. * <p/> * The method assigns the list to a (relatively) unique variable name and returns * the variable name to the caller so that the list can be accessed later on. * * @return * @throws QSARModelException if there are any problems within the R session. */ protected String loadParametersIntoRSession() throws QSARModelException { REXP result; Set keys = params.keySet(); String paramVariableName = getUniqueVariableName("param"); for (Iterator iterator = keys.iterator(); iterator.hasNext();) { String name = (String) iterator.next(); Object value = params.get(name); if (value instanceof Integer) { logger.debug("Assigning a Integer"); Integer tmp1 = (Integer) value; int[] tmp2 = new int[]{tmp1.intValue()}; rengine.assign(name, tmp2); } else if (value instanceof String) { logger.debug("Assigning a String"); rengine.assign(name, (String) value); } else if (value instanceof Boolean) { logger.debug("Assigning a Boolean"); Boolean tmp1 = (Boolean) value; if (tmp1.booleanValue()) result = rengine.eval(name + "<- TRUE"); else result = rengine.eval(name + "<- FALSE"); if (result == null) throw new QSARModelException("Error assigning a boolean"); } else if (value instanceof Double) { logger.debug("Assigning a Double"); Double tmp1 = (Double) value; double[] tmp2 = new double[]{tmp1.doubleValue()}; rengine.assign(name, tmp2); } else if (value instanceof Integer[]) { logger.debug("Assigning a Integer[]"); Integer[] tmp1 = (Integer[]) value; int[] tmp2 = new int[tmp1.length]; for (int i = 0; i < tmp1.length; i++) tmp2[i] = tmp1[i].intValue(); rengine.assign(name, tmp2); } else if (value instanceof Double[]) { logger.debug("Assigning a Double[]"); Double[] tmp1 = (Double[]) value; double[] tmp2 = new double[tmp1.length]; for (int i = 0; i < tmp1.length; i++) tmp2[i] = tmp1[i].doubleValue(); rengine.assign(name, tmp2); } else if (value instanceof Integer[][]) { logger.debug("Assigning a Integer[][]"); Integer[][] tmp1 = (Integer[][]) value; int nrow = tmp1.length; int ncol = tmp1[0].length; int[] tmp2 = new int[nrow * ncol]; for (int i = 0; i < ncol; i++) { for (int j = 0; j < nrow; j++) { tmp2[i * nrow + j] = (tmp1[j][i]).intValue(); } } rengine.assign(name, tmp2); result = rengine.eval(name + "<- matrix(" + name + ", nrow=" + nrow + ")"); if (result == null) throw new QSARModelException("Error assigning a int[][]"); } else if (value instanceof Double[][]) { logger.debug("Assigning a Double[][]"); Double[][] tmp1 = (Double[][]) value; int nrow = tmp1.length; int ncol = tmp1[0].length; double[] tmp2 = new double[nrow * ncol]; for (int i = 0; i < ncol; i++) { for (int j = 0; j < nrow; j++) { tmp2[i * nrow + j] = (tmp1[j][i]).doubleValue(); } } rengine.assign(name, tmp2); result = rengine.eval(name + "<- matrix(" + name + ", nrow=" + nrow + ")"); if (result == null) throw new QSARModelException("Error assigning a double[][]"); } } // make the list command String cmd = paramVariableName + " <- list("; for (Iterator iterator = keys.iterator(); iterator.hasNext();) { String name = (String) iterator.next(); cmd = cmd + name + " = " + name + ", "; } cmd = cmd + ")"; // now eval the command result = rengine.eval(cmd); if (result == null) throw new QSARModelException("Error making the parameter list"); // now lets remove all the variables we had assigned for (Iterator iterator = keys.iterator(); iterator.hasNext();) { String name = (String) iterator.next(); rengine.eval("rm(" + name + ")"); } return paramVariableName; } /** * Checks whether the class of a named object is of the specified class. * <p/> * * @param objectName The name of the R variable holding the object to check * @param objectClass The class to check for * @return true if the object is of the specified class, false if the object is not * of the specified class or the R command to obtain the class failed */ public boolean isOfClass(String objectName, String objectClass) { REXP klass = rengine.eval("class(" + objectName + ")"); if (klass == null) { return false; } return klass.asString().equals(objectClass); } /** * Removes an object from the R session. * * @param objectName The name of the R variable to remove * @throws QSARModelException if the <code>'rm'</code> command failed */ public void removeObject(String objectName) throws QSARModelException { REXP ret = rengine.eval("rm(\"" + objectName + "\")"); if (ret == null) throw new QSARModelException("Error removing \'" + objectName + "\'"); } /** * Abstract method to handle loading R models. * <p/> * This method can be used to load a previously saved R model object. Since * the user can save any arbitrary R object, checks must be made that the * object being returned is an instance of one of the current modeling classes. * <p/> * This is best achieved by forcing each modeling class to write its own loader. * * @param fileName The file containing the R object to load * @throws org.openscience.cdk.qsar.model.QSARModelException * if the R session could not load the object or if the loaded model * does not correspond to the class that it was loaded from * @see #saveModel */ abstract public void loadModel(String fileName) throws QSARModelException; /** * Abstract method to handle loading R models that were previously serialized. * <p/> * This method can be used to load a previously serialized R model object (usinging * serialize()). Since * the user can save any arbitrary R object, checks must be made that the * object being returned is an instance of one of the current modeling classes. * This is best achieved by forcing each modeling class to write its own loader. * <p/> * In addition * objects saved using serialize() do not have a name. As a result a name for the object must * be specified when using this method. * * @param serializedModel A String containing the ASCII sreialized R object * @param modelName The name of the model. (Within the R session, the model will be assigned to * a variable of this name) * @throws QSARModelException if the R session could not load the object or if the loaded model * does not correspond to the class that it was loaded from * @see #saveModel */ abstract public void loadModel(String serializedModel, String modelName) throws QSARModelException; /** * Specifies the parameters value. * * @param key A String representing the name of the parameter (corresponding to the * name described in the R manpages) * @param obj The value of the parameter * @throws QSARModelException if the parameters are of the wrong type for the given modeling function */ abstract public void setParameters(String key, Object obj) throws QSARModelException; abstract public void build() throws QSARModelException; abstract public void predict() throws QSARModelException; protected void finalize() { rengine.eval("rm(\"" + getModelName() + "\",pos=1)"); } ; class TextConsole implements RMainLoopCallbacks { public void rWriteConsole(Rengine re, String text) { System.out.print(text); } public void rBusy(Rengine re, int which) { System.out.println("rBusy(" + which + ")"); } public String rReadConsole(Rengine re, String prompt, int addToHistory) { System.out.print(prompt); try { BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String s = br.readLine(); return (s == null || s.length() == 0) ? s : s + "\n"; } catch (Exception e) { System.out.println("jriReadConsole exception: " + e.getMessage()); } return null; } public void rShowMessage(Rengine re, String message) { System.out.println("rShowMessage \"" + message + "\""); } public String rChooseFile(Rengine re, int newFile) { FileDialog fd = new FileDialog(new Frame(), (newFile == 0) ? "Select a file" : "Select a new file", (newFile == 0) ? FileDialog.LOAD : FileDialog.SAVE); fd.pack(); fd.setVisible(true); String res = null; if (fd.getDirectory() != null) res = fd.getDirectory(); if (fd.getFile() != null) res = (res == null) ? fd.getFile() : (res + fd.getFile()); return res; } public void rFlushConsole(Rengine re) { } public void rLoadHistory(Rengine re, String filename) { } public void rSaveHistory(Rengine re, String filename) { } public void rWriteConsole(Rengine arg0, String message, int arg2) { System.out.println("rShowMessage \"" + message + "\""); } } }