/* Copyright (C) 2011 Diego Darriba, David Posada This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package es.uvigo.darwin.jmodeltest.selection; import java.util.ArrayList; import java.util.List; import es.uvigo.darwin.jmodeltest.ApplicationOptions; import es.uvigo.darwin.jmodeltest.ModelTest; import es.uvigo.darwin.jmodeltest.exe.PhymlSingleModel; import es.uvigo.darwin.jmodeltest.io.TextOutputStream; import es.uvigo.darwin.jmodeltest.model.Model; import es.uvigo.darwin.jmodeltest.utilities.Utilities; public abstract class InformationCriterion { protected ApplicationOptions options = ApplicationOptions.getInstance(); public static final int IC_AIC = 1; public static final int IC_AICc = 2; public static final int IC_BIC = 3; public static final int IC_DT = 4; private static final String[] names = { "", "AIC", "AICc", "BIC", "DT" }; public int[] order; protected int numModels; protected Model[] models; protected boolean writePAUPblock; protected boolean doImportances; protected boolean doModelAveraging; protected double confidenceInterval; protected List<Model> confidenceModels; protected double cumWeight; protected Model minModel; protected boolean doCheckAgainstULK; protected Model unconstrainedModel; // importances protected double ifA, ifG, ifC, ifT; protected double ititv, ikappa, ipinvI, ishapeG, ipinvIG, ishapeIG; protected double iRa, iRb, iRc, iRd, iRe, iRf; // averaged estimates protected double afA, afG, afC, afT; protected double atitv, akappa, apinvI, ashapeG, apinvIG, ashapeIG; protected double aRa, aRb, aRc, aRd, aRe, aRf; protected boolean validResults; public List<Model> getConfidenceModels() { return confidenceModels; } public InformationCriterion(boolean mwritePAUPblock, boolean mdoImportances, boolean mdoModelAveraging, double minterval) { List<Model> modelList = new ArrayList<Model>(); for (Model model : ModelTest.getCandidateModels()) { if (model.getLnL() > 0) { modelList.add(model); } } models = modelList.toArray(new Model[0]); numModels = models.length; order = new int[numModels]; writePAUPblock = mwritePAUPblock; doImportances = mdoImportances; doModelAveraging = mdoModelAveraging; confidenceInterval = minterval; confidenceModels = new ArrayList<Model>(); validResults = true; doCheckAgainstULK = (getType() != IC_DT && (options.getNumPatterns() > 0) && (options .getUnconstrainedLnL() > 1e-10)); if (doCheckAgainstULK) { unconstrainedModel = new Model(numModels, "UModel", "-", options.getNumPatterns() - 1); unconstrainedModel.setLnL(options.getUnconstrainedLnL()); } } /************************************************************** * parameterImportance * * Calculates the importance for each parameter * * Assumes model estimates rate parametes Ra-Re when they are different to * Rf * * Note: Modeltest assumed TrN y TIM estimate only Rb, Re K81 estimates no R * parameter TVM estimates only Ra, Rc, Rd GTR y SIM estimate Ra, Rb, Rc, * Rd, Re * * Importance is rescaled by the total weight of the models included in the * confidence interval **************************************************************/ public void parameterImportance() { double weight; String partition; ifA = ifG = ifC = ifT = 0; ititv = iRa = iRb = iRc = iRd = iRe = iRf = 0; ipinvI = ishapeG = ipinvIG = ishapeIG = 0; for (Model tmodel : confidenceModels) { weight = getWeight(tmodel) / cumWeight; partition = tmodel.getPartition(); /* base frequencies */ if (tmodel.ispF()) { ifA += weight; ifC += weight; ifG += weight; ifT += weight; } /* substitution rates */ if (tmodel.ispT()) { ikappa += weight; ititv += weight; } else if (tmodel.ispR()) { if (partition.charAt(0) != partition.charAt(5)) iRa += weight; if (partition.charAt(1) != partition.charAt(5)) iRb += weight; if (partition.charAt(2) != partition.charAt(5)) iRc += weight; if (partition.charAt(3) != partition.charAt(5)) iRd += weight; if (partition.charAt(4) != partition.charAt(5)) iRe += weight; iRf += weight; } /* rate variation */ if (tmodel.ispI() && !tmodel.ispG()) { ipinvI += weight; } else if (!tmodel.ispI() && tmodel.ispG()) { ishapeG += weight; } else if (tmodel.ispI() && tmodel.ispG()) { ipinvIG += weight; ishapeIG += weight; } } } /************************************************************ * averageModels * * Calculates the model averaged estimates * * Assumes K81,TrN,TIM,TVMSIM, GRT estimate all rate parametes Ra-Rf * * for a given parameter, weight is rescaled by parameter importance ***********************************************************/ public void averageModels() { double weight, minWeight, NA; String partition; afA = afG = afC = afT = 0; atitv = akappa = aRa = aRb = aRc = aRd = aRe = aRf = 0; apinvI = ashapeG = apinvIG = ashapeIG = 0; NA = Utilities.NA; minWeight = getWeight(models[order[numModels - 1]]); for (Model tmodel : confidenceModels) { weight = getWeight(tmodel) / cumWeight; // because importance is // already reescaled by // cumWeight partition = tmodel.getPartition(); /* base frequencies */ if (tmodel.ispF()) { if (ifA >= minWeight) afA += tmodel.getfA() * weight / ifA; if (ifC >= minWeight) afC += tmodel.getfC() * weight / ifC; if (ifG >= minWeight) afG += tmodel.getfG() * weight / ifG; if (ifT >= minWeight) afT += tmodel.getfT() * weight / ifT; } /* substitution rates */ if (tmodel.ispT()) { if (ikappa >= minWeight) akappa += tmodel.getKappa() * weight / ikappa; if (ititv >= minWeight) atitv += tmodel.getTitv() * weight / ititv; } else if (tmodel.ispR()) { if (partition.charAt(0) != partition.charAt(5) && iRa >= minWeight) aRa += tmodel.getRa() * weight / iRa; if (partition.charAt(1) != partition.charAt(5) && iRb >= minWeight) aRb += tmodel.getRb() * weight / iRb; if (partition.charAt(2) != partition.charAt(5) && iRc >= minWeight) aRc += tmodel.getRc() * weight / iRc; if (partition.charAt(3) != partition.charAt(5) && iRd >= minWeight) aRd += tmodel.getRd() * weight / iRd; if (partition.charAt(4) != partition.charAt(5) && iRe >= minWeight) aRe += tmodel.getRe() * weight / iRe; if (iRf >= minWeight) aRf += tmodel.getRf() * weight / iRf; } /* rate variation */ if (!tmodel.ispI() && tmodel.ispG()) { if (ishapeG > minWeight) ashapeG += tmodel.getShape() * weight / ishapeG; } else if (tmodel.ispI() && !tmodel.ispG()) { if (ipinvI > minWeight) apinvI += tmodel.getPinv() * weight / ipinvI; } else if (tmodel.ispI() && tmodel.ispG()) { if (ishapeIG > minWeight) ashapeIG += tmodel.getShape() * weight / ishapeIG; if (ipinvIG > minWeight) apinvIG += tmodel.getPinv() * weight / ipinvIG; } } // make NA estimates when importance is zero (almost) if (ifA < minWeight) afA = NA; if (ifC < minWeight) afC = NA; if (ifG < minWeight) afG = NA; if (ifT < minWeight) afT = NA; if (ikappa < minWeight) akappa = NA; if (ititv < minWeight) atitv = NA; if (iRa < minWeight) aRa = NA; if (iRb < minWeight) aRb = NA; if (iRc < minWeight) aRc = NA; if (iRd < minWeight) aRd = NA; if (iRe < minWeight) aRe = NA; if (iRf < minWeight) aRf = NA; if (ishapeG < minWeight) ashapeG = NA; if (ipinvI < minWeight) apinvI = NA; if (ishapeIG < minWeight) ashapeIG = NA; if (ipinvIG < minWeight) apinvIG = NA; } public double getAfA() { return afA; } public double getIfA() { return ifA; } public double getIfG() { return ifG; } public double getIshapeIG() { return ishapeIG; } public double getIpinvIG() { return ipinvIG; } public double getIshapeG() { return ishapeG; } public double getIpinvI() { return ipinvI; } public double getiRf() { return iRf; } public double getiRe() { return iRe; } public double getiRd() { return iRd; } public double getiRc() { return iRc; } public double getiRb() { return iRb; } public double getiRa() { return iRa; } public double getIkappa() { return ikappa; } public double getItitv() { return ititv; } public double getIfT() { return ifT; } public double getIfC() { return ifC; } public double getAfG() { return afG; } public double getAshapeIG() { return ashapeIG; } public double getApinvIG() { return apinvIG; } public double getAshapeG() { return ashapeG; } public double getApinvI() { return apinvI; } public double getaRf() { return aRf; } public double getaRe() { return aRe; } public double getaRd() { return aRd; } public double getaRc() { return aRc; } public double getaRb() { return aRb; } public double getaRa() { return aRa; } public double getAkappa() { return akappa; } public double getAtitv() { return atitv; } public double getAfT() { return afT; } public double getAfC() { return afC; } public Model getMinModel() { return minModel; } @Override public String toString() { switch (getType()) { case IC_AIC: return "AIC"; case IC_BIC: return "BIC"; case IC_AICc: return "AICc"; case IC_DT: return "DT"; } return null; } public Model getModel(int i) { return models[order[i]]; } public void print(TextOutputStream stream) { int i, j; String criterion = names[getType()]; printHeader(stream); stream.println(" "); stream.println(" Model selected: "); getMinModel().print(stream); // print PAUP* block if (writePAUPblock) ModelTest.WritePaupBlock(stream, criterion, minModel); // print ML tree for best-fit model stream.println(" "); stream.println("Tree for the best " + criterion + " model = " + minModel.getTreeString()); // update unconstrained model if (unconstrainedModel != null) { unconstrainedModel.setLnL(minModel.getUnconstrainedLnL()); } // print weights stream.println(" "); stream.println(" "); stream.println("* " + criterion + " MODEL SELECTION : Selection uncertainty"); stream.println(" "); stream.printf( "Model -lnL K %4s delta weight cumWeight", criterion); if (doCheckAgainstULK && minModel.getUnconstrainedLnL() > 1e-10) { stream.println(" uDelta"); } else { stream.println(""); } stream.print("-------------------------------------------------------------------------"); if (doCheckAgainstULK && minModel.getUnconstrainedLnL() > 1e-10) { stream.print("-------------"); } for (i = 0; i < numModels; i++) { j = order[i]; // j = i; stream.println(" "); stream.printf("%-10s", models[j].getName()); stream.printf(" %10.5f", models[j].getLnL()); if (options.countBLasParameters) stream.printf(" %2d", models[j].getK()); else stream.printf(" %2d", models[j].getK() - options.getNumBranches()); stream.printf(" %10.6f", getValue(models[j])); stream.printf(" %9.6f", getDelta(models[j])); if (getWeight(models[j]) > 0.0001) stream.printf(" %9.6f", getWeight(models[j])); else stream.printf(" %4.2e", getWeight(models[j])); stream.printf(" %7.6f", getCumWeight(models[j])); if (doCheckAgainstULK && minModel.getUnconstrainedLnL() > 1e-10) { if (!options.isAmbiguous()) { stream.printf(" %10.4f", getUDelta(models[j])); } else if (i == 0) { stream.printf(" %10.4f", setUDelta(models[j])); } else { stream.print(" -"); } } } stream.print("\n-------------------------------------------------------------------------"); if (doCheckAgainstULK && minModel.getUnconstrainedLnL() > 1e-10) { stream.print("-------------"); } stream.println(""); printFooter(stream); Utilities.toConsoleEnd(); // indicate table availability if (ModelTest.buildGUI) { stream.println("\nModel selection results also available at the \"Model > Show model table\" menu"); Utilities.toConsoleEnd(); } // print confidence set stream.println(" "); stream.println(" "); stream.println("* " + criterion + " MODEL SELECTION : Confidence interval"); stream.println(" "); stream.print("There are " + confidenceModels.size() + " models in the "); stream.printf("%.0f% ", confidenceInterval * 100); stream.print("confidence interval: [ "); for (Model m : confidenceModels) { stream.print(m.getName() + " "); } stream.println("] "); // print parameter importances if (doImportances) { stream.println(" "); stream.println(" "); stream.println("* " + criterion + " MODEL SELECTION : Parameter importance"); stream.println(" "); stream.println("Parameter Importance"); stream.print("----------------------"); if (options.doF) { stream.printf("\nfA\t%10.4f", ifA); stream.printf("\nfC\t%10.4f", ifC); stream.printf("\nfG\t%10.4f", ifG); stream.printf("\nfT\t%10.4f", ifT); } stream.printf("\nkappa\t%10.4f", ikappa); stream.printf("\ntitv\t%10.4f", ititv); stream.printf("\nrAC\t%10.4f", iRa); stream.printf("\nrAG\t%10.4f", iRb); stream.printf("\nrAT\t%10.4f", iRc); stream.printf("\nrCG\t%10.4f", iRd); stream.printf("\nrCT\t%10.4f", iRe); stream.printf("\nrGT\t%10.4f", iRf); if (options.doI) stream.printf("\npinv(I)\t%10.4f", ipinvI); if (options.doG) stream.printf("\nalpha(G)\t%10.4f", ishapeG); if (options.doI && options.doG) { stream.printf("\npinv(IG)\t%10.4f", ipinvIG); stream.printf("\nalpha(IG)\t%10.4f", ishapeIG); } stream.println("\n----------------------"); stream.println("Values have been rounded."); stream.println(" (I): considers only +I models."); stream.println(" (G): considers only +G models."); stream.println(" (IG): considers only +I+G models."); } // print model averaging if (doModelAveraging) { stream.println(" "); stream.println(" "); stream.println("* " + criterion + " MODEL SELECTION : Model averaged estimates"); stream.println(" "); stream.println(" Model-averaged"); stream.println("Parameter estimates"); stream.print("-------------------------"); if (options.doF) { stream.printf("\nfA\t%13s", Utilities.checkNA(afA)); stream.printf("\nfC\t%13s", Utilities.checkNA(afC)); stream.printf("\nfG\t%13s", Utilities.checkNA(afG)); stream.printf("\nfT\t%13s", Utilities.checkNA(afT)); } stream.printf("\nkappa\t%13s", Utilities.checkNA(akappa)); stream.printf("\ntitv\t%13s", Utilities.checkNA(atitv)); stream.printf("\nrAC\t%13s", Utilities.checkNA(aRa)); stream.printf("\nrAG\t%13s", Utilities.checkNA(aRb)); stream.printf("\nrAT\t%13s", Utilities.checkNA(aRc)); stream.printf("\nrCG\t%13s", Utilities.checkNA(aRd)); stream.printf("\nrCT\t%13s", Utilities.checkNA(aRe)); stream.printf("\nrGT\t%13s", Utilities.checkNA(aRf)); if (options.doI) stream.printf("\npinv(I)\t%13s", Utilities.checkNA(apinvI)); if (options.doG) stream.printf("\nalpha(G)\t%13s", Utilities.checkNA(ashapeG)); if (options.doI && options.doG) { stream.printf("\npinv(IG)\t%13s", Utilities.checkNA(apinvIG)); stream.printf("\nalpha(IG)\t%13s", Utilities.checkNA(ashapeIG)); } stream.println("\n-------------------------"); stream.println("Numbers have been rounded."); stream.println(" (I): considers only +I models."); stream.println(" (G): considers only +G models."); stream.println(" (IG): considers only +I+G models."); } stream.println(" "); stream.println(" "); stream.println("* " + criterion + " MODEL SELECTION : Best Model's command line"); stream.println(" "); stream.println("phyml " + PhymlSingleModel.writePhyml3CommandLine(getMinModel(), false, options, false, -1)); if (getValue(minModel) < 0.0) { stream.println(" "); stream.println("WARNING! Criterion has zero or negative values. Please check whether your sample size is big enough compared to the number of parameters (K)"); } } public int getNumModels() { return numModels; } public boolean isValid() { return validResults; } public abstract void compute(); public abstract double computeSingle(Model model); // public abstract void print (TextOutputStream stream); public abstract void buildConfidenceInterval(); public abstract double getMinModelValue(); public abstract double getMinModelWeight(); public abstract double getValue(Model m); public abstract double getDelta(Model m); public abstract double getUDelta(Model m); public abstract double setUDelta(Model m); public abstract double getWeight(Model m); public abstract double getCumWeight(Model m); protected abstract void printHeader(TextOutputStream stream); protected abstract void printFooter(TextOutputStream stream); public abstract int getType(); }