/*
Copyright (C) 2009 Diego Darriba
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package es.uvigo.darwin.prottest.exe;
import static es.uvigo.darwin.prottest.global.ApplicationGlobals.APPLICATION_PROPERTIES;
import static es.uvigo.darwin.prottest.global.ProtTestConstants.OPTIMIZE_BIONJ;
import static es.uvigo.darwin.prottest.global.ProtTestConstants.OPTIMIZE_FIXED_BIONJ;
import static es.uvigo.darwin.prottest.global.ProtTestConstants.OPTIMIZE_ML;
import static es.uvigo.darwin.prottest.global.ProtTestConstants.OPTIMIZE_USER;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import pal.tree.ReadTree;
import pal.tree.TreeParseException;
import es.uvigo.darwin.prottest.exe.util.TemporaryFileManager;
import es.uvigo.darwin.prottest.global.ApplicationGlobals;
import es.uvigo.darwin.prottest.global.options.ApplicationOptions;
import es.uvigo.darwin.prottest.model.AminoAcidModel;
import es.uvigo.darwin.prottest.model.Model;
import es.uvigo.darwin.prottest.util.Utilities;
import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException;
import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException.ModelNotFoundException;
import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException.OSNotSupportedException;
import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException.PhyMLExecutionException;
import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException.StatsFileFormatException;
import es.uvigo.darwin.prottest.util.exception.ProtTestInternalException;
import es.uvigo.darwin.prottest.util.exception.TreeFormatException;
/**
* The Class PhyMLAminoAcidRunEstimator. It optimizes Amino-Acid
* model parameters using PhyML 3.0.
*
* @author Federico Abascal
* @author Diego Darriba
* @since 3.0
*/
public class PhyMLv3AminoAcidRunEstimator extends AminoAcidRunEstimator {
/** The PhyML implemented matrices. */
public static String[] IMPLEMENTED_MATRICES = {
"JTT",
"LG",
"DCMut",
"MtREV",
"MtMam",
"MtArt",
"Dayhoff",
"WAG",
"RtREV",
"CpREV",
"Blosum62",
"VT",
"HIVb",
"HIVw"
};
/** Suffix for temporary statistic files. */
private static final String STATS_FILE_SUFFIX = "_phyml_stats";
/** Suffix for temporary tree files. */
private static final String TREE_FILE_SUFFIX = "_phyml_tree";
/** Alignment filename. */
private String workAlignment;
/** Custom model substitution matrix file**/
private File modelFile;
public static File phymlBinary;
public static String phymlBinaryStr;
private File statsFile;
private File treeFile;
public static boolean checkBinary() {
boolean canExecute = false;
if (!PhyMLv3AminoAcidRunEstimator.phymlBinary.exists()) {
System.err.println("ERROR: PhyML binary not found: " +
PhyMLv3AminoAcidRunEstimator.phymlBinary.getAbsolutePath() + "\n");
} else if (!PhyMLv3AminoAcidRunEstimator.phymlBinary.canExecute()) {
System.err.println("ERROR: PhyML binary exists, but it cannot be executed: " +
PhyMLv3AminoAcidRunEstimator.phymlBinary.getAbsolutePath() + "\n");
} else {
canExecute = true;
}
return canExecute;
}
static {
boolean phymlGlobal = APPLICATION_PROPERTIES.getProperty("global-phyml-exe", "false").equalsIgnoreCase("true");
File exeFilesDir = new File(APPLICATION_PROPERTIES.getProperty("exe-dir", ApplicationGlobals.PATH));
if (!exeFilesDir.isAbsolute()) {
exeFilesDir = new File(ApplicationGlobals.PATH + File.separator + APPLICATION_PROPERTIES.getProperty("exe-dir", "bin"));
}
phymlBinary = new File(exeFilesDir.getAbsolutePath() + File.separator + "phyml");
if (phymlGlobal) {
String[] pathList = System.getenv("PATH").split(":");
for (String path : pathList)
{
phymlBinary = new File(path + "/phyml");
if (phymlBinary.exists() && phymlBinary.canExecute()) {
phymlBinaryStr = phymlBinary.getAbsolutePath();
break;
}
}
} else {
if (phymlBinary.exists() && phymlBinary.canExecute()) {
phymlBinaryStr = phymlBinary.getAbsolutePath();
} else {
phymlBinaryStr = exeFilesDir.getAbsolutePath() + File.separator + getPhymlVersion();
}
phymlBinary = new File(phymlBinaryStr);
}
}
/**
* Instantiates a new optimizer for amino-acid models
* using PhyML.
*
* @param options the application options instance
* @param model the amino-acid model to optimize
*/
public PhyMLv3AminoAcidRunEstimator(ApplicationOptions options, Model model) {
this(options, model, 1);
}
/**
* Instantiates a new optimizer for amino-acid models
* using PhyML.
*
* @param options the application options instance
* @param model the amino-acid model to optimize
* @param numberOfThreads the number of threads to use in the optimization
*/
public PhyMLv3AminoAcidRunEstimator(ApplicationOptions options, Model model, int numberOfThreads) {
super(options, model, numberOfThreads);
this.numberOfCategories = options.ncat;
try {
this.model = (AminoAcidModel) model;
// check if there is any matrix file
modelFile = new File(ApplicationGlobals.PATH + File.separator + "models" + File.separator + model.getMatrix());
} catch (ClassCastException cce) {
throw new ProtTestInternalException("Wrong model type");
}
}
/* (non-Javadoc)
* @see es.uvigo.darwin.prottest.exe.RunEstimator#optimizeModel(es.uvigo.darwin.prottest.global.options.ApplicationOptions)
*/
@Override
public boolean runEstimator()
throws ModelOptimizationException {
//let's call Phyml with the proper command line
this.workAlignment = TemporaryFileManager.getInstance().getAlignmentFilename(Thread.currentThread());
String inv = "0.0";
if (model.isInv()) {
inv = "e";
}
String rateCathegories = "1";
String alpha = "";
if (model.isGamma()) {
rateCathegories = "" + model.getNumberOfTransitionCategories();//options.ncat;
alpha = "e";
}
String tr = "BIONJ";
String F = "d";
if (model.isPlusF()) {
F = "e";
}
String topo = "lr";
switch (options.strategyMode) {
case OPTIMIZE_BIONJ:
tr = "BIONJ";
topo = "lr";
break;
case OPTIMIZE_FIXED_BIONJ:
if (TemporaryFileManager.getInstance().getTreeFilename(Thread.currentThread()) != null) {
tr = TemporaryFileManager.getInstance().getTreeFilename(Thread.currentThread());
topo = "lr";
} else {
topo = "r";
}
break;
case OPTIMIZE_ML:
topo = "tlr";
break;
case OPTIMIZE_USER:
tr = TemporaryFileManager.getInstance().getTreeFilename(Thread.currentThread());
topo = "lr";
}
try {
Runtime runtime = Runtime.getRuntime();
String str[] = new String[31];
for (int i = 0; i < str.length; i++) {
str[i] = "";
}
boolean phymlGlobal = APPLICATION_PROPERTIES.getProperty("global-phyml-exe", "false").equalsIgnoreCase("true");
File exeFilesDir = new File(APPLICATION_PROPERTIES.getProperty("exe-dir", ApplicationGlobals.PATH));
if (!exeFilesDir.isAbsolute()) {
exeFilesDir = new File(ApplicationGlobals.PATH + File.separator + APPLICATION_PROPERTIES.getProperty("exe-dir", "bin"));
}
File phymlBin;
phymlBin = new File(exeFilesDir.getAbsolutePath() + File.separator + "phyml");
String phymlBinName;
if (phymlGlobal) {
phymlBinName = "phyml";
} else {
if (phymlBin.exists() && phymlBin.canExecute()) {
phymlBinName = phymlBin.getAbsolutePath();
} else {
phymlBinName = exeFilesDir.getAbsolutePath() + File.separator + getPhymlVersion();
}
}
if (phymlBinName != null) {
// phyml -i seq_file_name -d aa ¿-q? -f d/e (d para -F y e para +F) -v 0/e (para -I o +I) -a e (para estimar alpha)
// -c 0/4/8 (num rate categories) -u user_tree_file (opcional)
// -o tlr/lr (dependiendo de si optimizamos la topología o no)
// -m WAG (default) | JTT | MtREV | Dayhoff | DCMut | RtREV | CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom
str[0] = phymlBinName;
// input alignment
str[4] = "-i";
str[5] = workAlignment;
// number of rate categories
str[6] = "-c";
str[7] = rateCathegories;
// the model
str[8] = "-m";
if (!Arrays.asList(IMPLEMENTED_MATRICES).contains(model.getMatrix())) {
// check matrix file
if (!modelFile.exists()) {
throw new ModelNotFoundException(model.getMatrix());
}
str[9] = "custom";
str[27] = "--aa_rate_file";
str[28] = modelFile.getAbsolutePath();
} else {
str[9] = model.getMatrix();
}
// proportion of invariable sites
str[10] = "-v";
str[11] = inv;
// value of the gamma shape parameter (if gamma distribution)
if (!alpha.equals("")) {
str[12] = "-a";
str[13] = alpha;
}
// topology optimization
str[14] = "-o";
str[15] = topo;
// amino-acid frequencies
str[16] = "-f";
str[17] = F;
// starting tree file
if (!tr.equals("BIONJ")) {
str[18] = "-u";
str[19] = tr;
}
// data type
str[20] = "-d";
str[21] = "aa";
// bootstrapping
str[22] = "-b";
str[23] = "0";
if (APPLICATION_PROPERTIES.getProperty("phyml_thread_scheduling", "false").equalsIgnoreCase("true")) {
str[24] = "--num_threads";
str[25] = String.valueOf(numberOfThreads);
}
if (APPLICATION_PROPERTIES.getProperty("no-memory-check", "no").equalsIgnoreCase("yes")) {
str[26] = "--no_memory_check";
}
if (options.strategyMode == OPTIMIZE_ML) {
str[29] = "-s";
str[30] = options.getTreeSearchOperation();
} else {
str[29] = str[30] = "";
}
model.setCommandLine(str);
proc = runtime.exec(str);
proc.getOutputStream().write(modelFile.getPath().getBytes());
ExternalExecutionManager.getInstance().addProcess(proc);
} else {
OSNotSupportedException e =
new OSNotSupportedException("PhyML");
throw e;
}
proc.getOutputStream().close();
StreamGobbler errorGobbler = new PhymlStreamGobbler(new InputStreamReader(proc.getErrorStream()), "Phyml-Error", true, RunEstimator.class);
StreamGobbler outputGobbler = new PhymlStreamGobbler(new InputStreamReader(proc.getInputStream()), "Phyml-Output", true, RunEstimator.class);
errorGobbler.start();
outputGobbler.start();
int exitVal = proc.waitFor();
ExternalExecutionManager.getInstance().removeProcess(proc);
pfine("Phyml's command-line: ");
for (int i = 0; i < str.length; i++) {
pfine(str[i] + " ");
}
pfineln("");
if (exitVal != 0) {
errorln("Phyml's exit value: " + exitVal + " (there was probably some error)");
error("Phyml's command-line: ");
for (int i = 0; i < str.length; i++) {
error(str[i] + " ");
}
errorln("");
errorln("Please, take a look at the Phyml's log below:");
String line;
try {
FileReader input = new FileReader(TemporaryFileManager.getInstance().getLogFilename(Thread.currentThread()));
BufferedReader br = new BufferedReader(input);
while ((line = br.readLine()) != null) {
errorln(line);
}
} catch (IOException e) {
errorln("Unable to read the log file: " + TemporaryFileManager.getInstance().getLogFilename(Thread.currentThread()));
}
}
} catch (InterruptedException e) {
throw new PhyMLExecutionException("Interrupted execution: " + e.getMessage());
} catch (IOException e) {
throw new PhyMLExecutionException("I/O error: " + e.getMessage());
}
statsFile = new File(workAlignment + STATS_FILE_SUFFIX);
if (!(statsFile.exists() && statsFile.canRead()))
{
/* try with txt suffix */
statsFile = new File(workAlignment + STATS_FILE_SUFFIX + ".txt");
if (!(statsFile.exists() && statsFile.canRead()))
throw new StatsFileFormatException("PhyML",
"Stats file does not exist. Please check if PhyML is working.");
}
statsFile.deleteOnExit();
treeFile = new File(workAlignment + TREE_FILE_SUFFIX);
if (!(treeFile.exists() && treeFile.canRead()))
{
/* try with txt suffix */
treeFile = new File(workAlignment + TREE_FILE_SUFFIX + ".txt");
if (!(treeFile.exists() && treeFile.canRead()))
throw new TreeFormatException(
"Tree file does not exist. Please check if PhyML is working correctly.");
}
treeFile.deleteOnExit();
if (!(readStatsFile() && readTreeFile())) {
return false;
}
return true;
}
/**
* Read the temporary statistics file.
*
* @return true, if successful
*/
private boolean readStatsFile()
throws ModelOptimizationException {
String line;
try {
FileReader input = new FileReader(statsFile.getAbsolutePath());
BufferedReader br = new BufferedReader(input);
while ((line = br.readLine()) != null) {
pfinerln("[DEBUG] PHYML: " + line);
if (line.length() > 0) {
if (line.startsWith(". Model of amino acids substitution")) {
String matrixName = Utilities.lastToken(line);
//TODO: This line exists due to a bug in the phyml latest versions
// where the HIVw matrix is shown as HIVb in the stats file
if (!model.getMatrix().equals("HIVw"))
if (!(model.getMatrix().equals(matrixName) || (modelFile.exists() &&
!Arrays.asList(IMPLEMENTED_MATRICES).contains(model.getMatrix())))) {
String errorMsg = "Matrix names doesn't match";
errorln("PHYML: " + line);
errorln("Last token: " + Utilities.lastToken(line));
errorln("It should be: " + model.getMatrix());
errorln(errorMsg);
throw new ModelNotFoundException(model.getMatrix());
}
} else if (line.startsWith(". Log-likelihood")) {
model.setLk(Double.parseDouble(Utilities.lastToken(line)));
} else if (line.startsWith(". Discrete gamma model")) {
if (Utilities.lastToken(line).equals("Yes")) {
line = br.readLine();
pfinerln("[DEBUG] PHYML: " + line);
if (model.getNumberOfTransitionCategories() != Integer.parseInt(Utilities.lastToken(line))) {
String errorMsg = "There were errors in the number of transition categories: " + model.getNumberOfTransitionCategories() + " vs " + Integer.parseInt(Utilities.lastToken(line));
errorln(errorMsg);
throw new StatsFileFormatException("PhyML", errorMsg);
//prottest.setCurrentModel(-2);
}
line = br.readLine();
pfinerln("[DEBUG] PHYML: " + line);
model.setAlpha(Double.parseDouble(Utilities.lastToken(line)));
}
} else if (line.startsWith(". Proportion of invariant:")) {
model.setInv(Double.parseDouble(Utilities.lastToken(line)));
} else if (line.startsWith(". Time used")) {
time = Utilities.lastToken(line);
}
}
}
} catch (IOException e) {
throw new StatsFileFormatException("PhyML", e.getMessage());
}
return true;
}
/**
* Read the temporary tree file.
*
* @return true, if successful
*
* @throws TreeFormatException the tree format exception
*/
private boolean readTreeFile()
throws TreeFormatException {
try {
model.setTree(new ReadTree(treeFile.getAbsolutePath()));
} catch (TreeParseException e) {
String errorMsg = "ProtTest: wrong tree format, exiting...";
throw new TreeFormatException(errorMsg);
} catch (IOException e) {
String errorMsg = "Error: File not found (IO error), exiting...";
throw new TreeFormatException(errorMsg);
}
return true;
}
/**
* Delete temporary files.
*
* @return true, if successful
*/
@Override
protected boolean deleteTemporaryFiles() {
File f = new File(TemporaryFileManager.getInstance().getLogFilename(Thread.currentThread()));
return f.delete();
}
/**
* Gets the PhyML executable name for the current Operating System.
*
* @return the PhyML executable name
*/
private static String getPhymlVersion() {
String os = System.getProperty("os.name");
String oa = System.getProperty("os.arch");
if (os.startsWith("Mac")) {
if (oa.startsWith("ppc")) {
return null;
} else {
return "PhyML_3.0_macOS_i386";
}
} else if (os.startsWith("Linux")) {
if (oa.contains("md64")) {
return "PhyML_3.0_linux64";
} else {
return "PhyML_3.0_linux32";
}
} else if (os.startsWith("Window")) {
return "PhyML_3.0_win32.exe";
} else {
return null;
}
}
}