/*
Copyright (C) 2011 Diego Darriba, David Posada
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package es.uvigo.darwin.jmodeltest.utilities;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.PushbackReader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Locale;
import pal.tree.Tree;
import pal.tree.TreeParseException;
import es.uvigo.darwin.jmodeltest.ApplicationOptions;
import es.uvigo.darwin.jmodeltest.ModelTest;
import es.uvigo.darwin.jmodeltest.ModelTestService;
import es.uvigo.darwin.jmodeltest.exe.RunConsense;
import es.uvigo.darwin.jmodeltest.exe.RunPhyml;
import es.uvigo.darwin.jmodeltest.exe.RunPhymlThread;
import es.uvigo.darwin.jmodeltest.io.AlignmentReader;
import es.uvigo.darwin.jmodeltest.io.TextOutputStream;
import es.uvigo.darwin.jmodeltest.model.Model;
import es.uvigo.darwin.jmodeltest.observer.ConsoleProgressObserver;
import es.uvigo.darwin.jmodeltest.selection.AIC;
import es.uvigo.darwin.jmodeltest.selection.AICc;
import es.uvigo.darwin.jmodeltest.selection.BIC;
import es.uvigo.darwin.jmodeltest.selection.DT;
import es.uvigo.darwin.jmodeltest.selection.HLRT;
import es.uvigo.darwin.jmodeltest.selection.InformationCriterion;
import es.uvigo.darwin.jmodeltest.tree.TreeUtilities;
public class Simulation {
private ApplicationOptions options;
private static final double nil = -99999;
private static final String AIC = "AIC";
private static final String AICc = "AICc";
private static final String BIC = "BIC";
private static final String DT = "DT";
private static final String[] IC_TYPES = { AIC, AICc, BIC, DT };
public Simulation(ApplicationOptions options) {
this.options = options;
}
/*************************
* runSimulations ****************************** * Organizes all the tasks
* that the program needs to carry out * * *
***********************************************************************/
public void run() {
int i;
boolean append;
/*
* .trees: base trees for every model in standard orden
*
* .AIC_tre: best tree for the AIC model .AIC_mmi_mtre: model averaged
* AIC tree (50% majority rule) .AIC_mmi_atre: model averaged AIC tree
* (strict) ... same for AICc, BIC and DT
*
* .parms: parameter estimates for every model in standard orden
* .AIC_parms: parameter estimates for the AIC model .AIC_imp_parms:
* parameter importances obtained with the AIC .AIC_mmi_parms: parameter
* importances obtained with the AIC ... same for AICc, BIC and DT
*/
// make simulation directories
String simdir = "sims/";
new File(simdir).mkdir();
String outdir = simdir + "out/";
new File(outdir).mkdir();
String treedir = simdir + "trees/";
new File(treedir).mkdir();
String parmdir = simdir + "parameters/";
new File(parmdir).mkdir();
String simsName = options.simulationsName;
if (options.getInputFile().getName().endsWith("001")
|| options.getInputFile().getName().endsWith("001.phy")
|| options.getInputFile().getName().endsWith("001.dat"))
append = false;
else
append = true;
// outfiles
TextOutputStream outConsole = ModelTest.setMainConsole(new TextOutputStream(
outdir + simsName + ".out", append));
TextOutputStream treeConsole = new TextOutputStream(treedir + simsName
+ ".trees", append);
TextOutputStream parameterConsole = new TextOutputStream(parmdir
+ simsName + ".parms", append);
// print header information
ModelTest.printHeader(outConsole);
// print notice information
// printNotice(outConsole);
// check expiration date
// CheckExpiration (outConsole);
// print the command line
outConsole.print("\nArguments =");
for (i = 0; i < ModelTest.arguments.length; i++)
outConsole.print(" " + ModelTest.arguments[i]);
// open data file
File file = options.getInputFile();
outConsole.print("\n\nReading data file \"" + file.getName()
+ "\"...");
if (file.exists()) {
try {
// File outputFile = File
// .createTempFile("jmodeltest", "input.aln");
String alnStr = ModelTestService.readAlignment(file, options.getAlignmentFile());
PushbackReader pr = new PushbackReader(
new StringReader(alnStr));
options.setAlignment(AlignmentReader.createAlignment(
new PrintWriter(System.err),
pr,
true)); // file
outConsole.println(" OK.");
outConsole.println(" number of sequences: "
+ options.getNumTaxa());
outConsole.println(" number of sites: " + options.getNumSites());
} catch (Exception e)// file cannot be read correctly
{
System.err.println("\nThe specified file \""
+ file.getName()
+ "\" cannot be read as an alignment");
outConsole.println(" failed.\n");
System.exit(0);
}
} else // file does not exist
{
System.err.println("\nThe specified file \""
+ file.getName() + "\" cannot be found");
outConsole.println(" failed.\n");
System.exit(0);
}
// open tree file if necessary
if (options.userTopologyExists) {
File treefile = options.getTreeFile();
options.setInputTreeFile(file);
outConsole.print("Reading tree file \"" + treefile.getName()
+ "\"...");
// read the tree in
Tree tree = null;
try {
tree = TreeUtilities.readTree(treefile.getAbsolutePath());
} catch (IOException e) {
System.err.println("\nThe specified tree file \""
+ treefile.getName() + "\" cannot be found");
outConsole.println(" failed.\n");
System.exit(0);
} catch (TreeParseException e) {
System.err.println("\nThe specified file \""
+ treefile.getName()
+ "\" cannot be read as valid Newick tree");
outConsole.println(" failed.\n");
System.exit(0);
}
if (tree != null) {
options.setUserTree(TreeUtilities.toNewick(tree, true,
false, false));
TextOutputStream out = new TextOutputStream(
options.getTreeFile().getAbsolutePath());
out.print(TreeUtilities.toNewick(tree, true, false, false));
out.close();
outConsole.println(" OK.");
}
}
// print some progress
System.err.print("Doing \"" + file.getName() + "\" ... ");
// calculate number of models
if (options.getSubstTypeCode() == 0)
options.setNumModels(3);
else if (options.getSubstTypeCode() == 1)
options.setNumModels(5);
else if (options.getSubstTypeCode() == 2)
options.setNumModels(7);
else
options.setNumModels(11);
if (options.doF)
options.setNumModels(options.getNumModels() * 2);
if (options.doI && options.doG)
options.setNumModels(options.getNumModels() * 4);
else if (options.doI || options.doG)
options.setNumModels(options.getNumModels() * 2);
// build set of models
options.setCandidateModels();
// calculate likelihoods with phyml in the command line
RunPhyml phymlrun = new RunPhymlThread(new ConsoleProgressObserver(options), options, ModelTest.getCandidateModels());
phymlrun.execute();
// print all model trees to tree file and parameters to parms file
if (!append)
parameterConsole
.println("data\tname\tln\tK\tfA\tfC\tfG\tfT\tkappa\ttitv\trAC\tAG\trAT\trCG\trCT\trGT\tpinvI\tshapeG\tpinvIG\tshapeIG");
for (i = 0; i < options.getNumModels(); i++) {
treeConsole.println(options.getInputFile().getName() + "\t"
+ ModelTest.getCandidateModels()[i].getName() + "\t"
+ ModelTest.getCandidateModels()[i].getTreeString());
printModelLine(
ModelTest.getCandidateModels()[i],
parameterConsole,
options.getInputFile().getName() + "\t"
+ ModelTest.getCandidateModels()[i].getName(), nil, nil);
}
// do AIC if selected
if (options.doAIC) {
// TODO: oquhuh
printSelection("AIC", treedir, simsName, parmdir, outConsole,
append);
}
// do AICc if selected
if (options.doAICc) {
printSelection("AICc", treedir, simsName, parmdir, outConsole,
append);
}
// do BIC if selected
if (options.doBIC) {
printSelection("BIC", treedir, simsName, parmdir, outConsole,
append);
}
// do DT if selected
if (options.doDT) {
printSelection("DT", treedir, simsName, parmdir, outConsole, append);
}
// do hLRT if selected
if (options.doHLRT) {
HLRT myHLRT = new HLRT(options);
myHLRT.compute(!options.backwardHLRTSelection,
options.confidenceLevelHLRT, options.writePAUPblock);
treeConsole.println(ModelTest.averagedTreeString);
}
// do dLRT if selected
if (options.doDLRT) {
HLRT myHLRT = new HLRT(options);
myHLRT.computeDynamical(!options.backwardHLRTSelection,
options.confidenceLevelHLRT, options.writePAUPblock);
}
outConsole.println("\n=> This run has finished.\n");
treeConsole.close();
System.err.println("OK.");
}
/****************************
* printModelLine *************************** * Print model components in a
* tabulated format * *
************************************************************************/
public void printModelLine(Model model, TextOutputStream stream,
String whichName, double score, double weight) {
if (whichName == null)
stream.printf("%-10s", model.getName());
else
stream.printf("%-10s", whichName);
if (model.getLnL() == 0) {
stream.println("\tOPTIMIZATION FAILED!");
System.exit(0);
} else {
stream.printf("\t%.4f", model.getLnL());
stream.printf("\t%d", model.getK());
if (score != nil)
stream.printf("\t%.4f ", score);
if (weight != nil)
stream.printf("\t%.4f ", weight);
if (model.ispF()) {
stream.printf("\t%.4f ", model.getfA());
stream.printf("\t%.4f ", model.getfC());
stream.printf("\t%.4f ", model.getfG());
stream.printf("\t%.4f ", model.getfT());
} else
stream.print("\t-\t-\t-\t-");
if (model.ispT()) {
stream.printf("\t%.4f", model.getKappa());
stream.printf("\t%.4f", model.getTitv());
} else
stream.print("\t-\t-");
if (model.ispR()) {
stream.printf("\t%.4f", model.getRa());
stream.printf("\t%.4f", model.getRb());
stream.printf("\t%.4f", model.getRc());
stream.printf("\t%.4f", model.getRd());
stream.printf("\t%.4f", model.getRe());
stream.printf("\t%.4f", 1.0);
} else
stream.print("\t-\t-\t-\t-\t-\t-");
if (model.ispI() && model.ispG()) {
stream.printf("\t-\t-\t%.4f", model.getPinv());
stream.printf("\t%.4f", model.getShape());
} else if (model.ispI())
stream.printf("\t%.4f\t-\t-\t-", model.getPinv());
else if (model.ispG())
stream.printf("\t-\t%.4f\t-\t-", model.getShape());
else
stream.print("\t-\t-\t-\t-");
stream.println(" ");
}
}
/*************************
* CheckNAsim ********************************* * Returns formatted value or
* NA symbol depending on variable status * * *
***********************************************************************/
public static String CheckNAsim(double value) {
if (value == Utilities.NA)
return "-";
else {
String s = String.format(Locale.ENGLISH, "%.4f", value);
return s;
}
}
public void printSelection(String icType, String treedir, String simsName,
String parmdir, TextOutputStream outConsole, boolean append) {
// Validate
if (!Arrays.asList(IC_TYPES).contains(icType)) {
// TODO: EXCEPTION!
}
TextOutputStream ICtre = new TextOutputStream(treedir + simsName + "."
+ icType + "_tre", append);
TextOutputStream IC_mmi_mtre = new TextOutputStream(treedir + simsName
+ "." + icType + "_mmi_mtre", append);
TextOutputStream IC_mmi_atre = new TextOutputStream(treedir + simsName
+ "." + icType + "_mmi_atre", append);
TextOutputStream IC_parms = new TextOutputStream(parmdir + simsName
+ "." + icType + "_parms", append);
TextOutputStream IC_imp_parms = new TextOutputStream(parmdir + simsName
+ "." + icType + "_imp_parms", append);
TextOutputStream IC_mmi_parms = new TextOutputStream(parmdir + simsName
+ "." + icType + "_mmi_parms", append);
InformationCriterion ic;
if (icType.equals(AIC)) {
ic = new AIC(options.writePAUPblock, options.doImportances,
options.doModelAveraging, options.confidenceInterval);
} else if (icType.equals("AICc")) {
ic = new AICc(options.writePAUPblock, options.doImportances,
options.doModelAveraging, options.confidenceInterval);
} else if (icType.equals("BIC")) {
ic = new BIC(options.writePAUPblock, options.doImportances,
options.doModelAveraging, options.confidenceInterval);
} else if (icType.equals("DT")) {
ic = new DT(options.writePAUPblock, options.doImportances,
options.doModelAveraging, options.confidenceInterval);
} else {
ic = null;
// TODO: EXCEPTION!!!
}
ic.compute();
ic.print(outConsole);
if (icType.equals(AIC)) {
ModelTest.setMyAIC((AIC) ic);
} else if (icType.equals("AICc")) {
ModelTest.setMyAICc((AICc) ic);
} else if (icType.equals("BIC")) {
ModelTest.setMyBIC((BIC) ic);
} else if (icType.equals("DT")) {
ModelTest.setMyDT((DT) ic);
}
ICtre.println(ic.getMinModel().getTreeString());
if (!append)
IC_parms.print("data\tname\tln\tK\tscore\tweigth\tfA\tfC\tfG\tfT\tkappa\ttitv\trAC\tAG\trAT\trCG\trCT\trGT\tpinvI\tshapeG\tpinvIG\tshapeIG\n");
printModelLine(ic.getMinModel(), IC_parms, options.getInputFile().getName()
+ "\t" + ic.getMinModel().getName(), ic.getMinModelValue(),
ic.getMinModelWeight());
if (options.doAveragedPhylogeny) {
new RunConsense(ic, "50% majority rule",
options.confidenceInterval);
IC_mmi_mtre.println(ModelTest.averagedTreeString);
new RunConsense(ic, "strict",
options.confidenceInterval);
IC_mmi_atre.println(ModelTest.averagedTreeString);
}
if (options.doImportances) {
if (!append) {
IC_imp_parms
.print("data\tfA\tfC\tfG\tfT\tkappa\ttitv\trAC\tAG\trAT\trCG\trCT\trGT\tpinvI\tshapeG\tpinvIG\tshapeIG\n");
}
IC_imp_parms.print(options.getInputFile().getName());
IC_imp_parms.printf("\t%.4f", ic.getIfA());
IC_imp_parms.printf("\t%.4f", ic.getIfC());
IC_imp_parms.printf("\t%.4f", ic.getIfG());
IC_imp_parms.printf("\t%.4f", ic.getIfT());
IC_imp_parms.printf("\t%.4f", ic.getIkappa());
IC_imp_parms.printf("\t%.4f", ic.getItitv());
IC_imp_parms.printf("\t%.4f", ic.getiRa());
IC_imp_parms.printf("\t%.4f", ic.getiRb());
IC_imp_parms.printf("\t%.4f", ic.getiRc());
IC_imp_parms.printf("\t%.4f", ic.getiRd());
IC_imp_parms.printf("\t%.4f", ic.getiRe());
IC_imp_parms.printf("\t%.4f", ic.getiRf());
IC_imp_parms.printf("\t%.4f", ic.getIpinvI());
IC_imp_parms.printf("\t%.4f", ic.getIshapeG());
IC_imp_parms.printf("\t%.4f", ic.getIpinvIG());
IC_imp_parms.printf("\t%.4f", ic.getIshapeIG());
IC_imp_parms.println(" ");
}
if (options.doModelAveraging) {
if (!append) {
IC_mmi_parms
.print("data\tfA\tfC\tfG\tfT\tkappa\ttitv\trAC\tAG\trAT\trCG\trCT\trGT\tpinvI\tshapeG\tpinvIG\tshapeIG\n");
}
IC_mmi_parms.print(options.getInputFile().getName());
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getAfA()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getAfC()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getAfG()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getAfT()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getAkappa()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getAtitv()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getaRa()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getaRb()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getaRc()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getaRd()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getaRe()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getaRf()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getApinvI()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getAshapeG()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getApinvIG()));
IC_mmi_parms.printf("\t%s", CheckNAsim(ic.getAshapeIG()));
IC_mmi_parms.println(" ");
}
ICtre.close();
IC_mmi_mtre.close();
IC_mmi_atre.close();
IC_parms.close();
IC_imp_parms.close();
IC_mmi_parms.close();
}
}