/*
Copyright (C) 2011 Diego Darriba, David Posada
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package es.uvigo.darwin.jmodeltest.selection;
import java.util.Enumeration;
import es.uvigo.darwin.jmodeltest.ApplicationOptions;
import es.uvigo.darwin.jmodeltest.ModelTest;
import es.uvigo.darwin.jmodeltest.io.TextOutputStream;
import es.uvigo.darwin.jmodeltest.model.Model;
import es.uvigo.darwin.jmodeltest.statistics.Statistics;
import es.uvigo.darwin.jmodeltest.utilities.Utilities;
public class HLRT {
private ApplicationOptions options;
public final double MAX_PROB = 0.999999;
public final double MIN_PROB = 0.000001;
public double alphaLRT;
public TextOutputStream stream = ModelTest.getMainConsole();
public int TPMnumber;
public int TIMnumber;
// constructor
public HLRT(ApplicationOptions options) {
this.options = options;
TPMnumber = 0;
TIMnumber = 0;
}
/****************************
* compute ********************************* * Builds and tests the
* hierarchy of LRTs accordingly to the starting * model and adding or
* removing parameters in a particular order * *
***********************************************************************/
public void compute(boolean forward, double alpha, boolean writePAUPblock) {
int i;
double P;
Model currentModel, nullModel, altModel, competingModel;
String[] hypotheses;
alphaLRT = alpha;
hypotheses = new String[ModelTest.testingOrder.size()];
i = 0;
for (Enumeration<String> e = ModelTest.testingOrder.elements(); e
.hasMoreElements();)
hypotheses[i++] = e.nextElement();
if (ModelTest.buildGUI)
System.out.print("computing hLRT ... ");
if (forward)
currentModel = ModelTest.getCandidateModels()[0];
else
currentModel = ModelTest.getCandidateModels()[options
.getNumModels() - 1];
stream.println("\n\n\n---------------------------------------------------------------");
stream.println("* *");
stream.println("* HIERARCHICAL LIKELIHOO RATIO TESTS (hLRT) *");
stream.println("* *");
stream.println("---------------------------------------------------------------");
if (options.getSubstTypeCode() >= 4) {
stream.println("\nhLRT is not available for the 203 substitution scheme.");
stream.println("Please use one of the other available selection criteria (AIC, BIC, AICc or DT).");
return;
}
if (options.fixedTopology | options.userTopologyExists) {
stream.println("\nSettings: ");
if (forward)
stream.println(" Forward selection (adding parameters)");
else
stream.println(" Backward selection (removing parameters)");
stream.println(" starting model = " + currentModel.getName());
stream.print(" hypotheses order = ");
for (i = 0; i < hypotheses.length; i++) {
stream.print(hypotheses[i]);
if (i < hypotheses.length - 1)
stream.print("-");
}
stream.print("\n Confidence alpha level = ");
stream.printf("%6.4f\n", alphaLRT);
nullModel = null;
altModel = null;
for (i = 0; i < hypotheses.length; i++) {
if (forward)
nullModel = currentModel;
else
altModel = currentModel;
stream.println("\n* Tested hypothesis = " + hypotheses[i]);
competingModel = findCompetingModel(currentModel,
hypotheses[i], forward);
if (competingModel == null) {
stream.println("Cannot be tested. No competing model differs just by "
+ hypotheses[i]
+ " from "
+ currentModel.getName()
+ ".\nRevise the order of hypotheses selected for the LRTs."
+ "\n...Skipping to the next hypothesis");
// System.err.println("hypothesis = " + hypotheses[i] +
// " -- not found best competing model");
continue;
}
// System.err.println("hypothesis = " + hypotheses[i] +
// " -- found best competing model = " + competingModel.name);
if (ApplicationOptions.getInstance().getSubstTypeCode() == 3)
if (competingModel.getName().startsWith("TIM")
|| competingModel.getName().startsWith("TPM"))
competingModel = BestTIMTPM(competingModel);
if (forward)
altModel = competingModel;
else
nullModel = competingModel;
stream.printf("Null model = %-8s", nullModel.getName());
stream.printf("\t-lnL = %6.4f", nullModel.getLnL());
stream.printf("\nAlternative model = %-8s", altModel.getName());
stream.printf("\t-lnL = %6.4f", altModel.getLnL());
stream.println();
if (hypotheses[i].equals("gamma")
|| hypotheses[i].equals("pinv"))
P = LRTboundary(nullModel, altModel);
else
P = LRT(nullModel, altModel);
if (forward) {
if (P < alphaLRT)
currentModel = altModel;
else {
stream.println("\nThe current model could not be rejected");
if (ModelTest.buildGUI)
Utilities.toConsoleEnd();
break;
}
} else {
if (P > alphaLRT)
currentModel = nullModel;
else {
stream.println("\nThe current model rejected the null model");
if (ModelTest.buildGUI)
Utilities.toConsoleEnd();
break;
}
}
if (ModelTest.buildGUI)
Utilities.toConsoleEnd();
}
ModelTest.setMinHLRT(currentModel);
stream.println("\n Model selected: ");
ModelTest.getMinHLRT().print(stream);
// print ML tree for best-fit model
if (ApplicationOptions.getInstance().optimizeMLTopology)
stream.println("\nML tree (NNI) for the best hLRT model = "
+ ModelTest.getMinHLRT().getTreeString());
// print PAUP* block
if (writePAUPblock)
ModelTest
.WritePaupBlock(stream, "hLRT", ModelTest.getMinHLRT());
} else {
stream.println("\nhLRT is only available when likelihoods are calculated on the same tree (i.e., models are nested)");
stream.println("Execute jModelTest using a fixed BIONJ-JC tree or a user-defined topology");
}
if (ModelTest.buildGUI) {
Utilities.toConsoleEnd();
System.out.println("OK");
}
}
/****************************
* computeDynamical ************************ * Builds and tests the
* hierarchy of LRTs in a dynamic fashion -- finding the best step to take
* (biggest increase in lnL) -- * and adding or removing parameters in a
* particular order * *
***********************************************************************/
public void computeDynamical(boolean forward, double alpha,
boolean writePAUPblock) {
int i, j, bestHypothesisIndex;
double P;
Model currentModel, nullModel, altModel, competingModel, bestCompetingModel;
String[] hypotheses, trimmedHypotheses;
boolean thereAreValidHypotheses;
double lnDifference, bestLnDifference;
alphaLRT = alpha;
hypotheses = new String[ModelTest.testingOrder.size()];
i = 0;
for (String s : ModelTest.testingOrder)
hypotheses[i++] = s;
if (ModelTest.buildGUI)
System.out.print("computing dynamical LRTs ... ");
if (forward)
currentModel = ModelTest.getCandidateModels()[0];
else
currentModel = ModelTest.getCandidateModels()[options
.getNumModels() - 1];
stream.println("\n\n\n---------------------------------------------------------------");
stream.println("* *");
stream.println("* DYNAMICAL LIKELIHOO RATIO TESTS (dLRT) *");
stream.println("* *");
stream.println("---------------------------------------------------------------");
stream.println("\nSettings: ");
if (forward)
stream.println(" Forward selection (adding parameters)");
else
stream.println(" Backward selection (removing parameters)");
stream.println(" starting model = " + currentModel.getName());
stream.print(" hypotheses = ");
for (i = 0; i < hypotheses.length; i++) {
stream.print(hypotheses[i]);
if (i < hypotheses.length - 1)
stream.print(", ");
}
stream.print("\n Confidence alpha level = ");
stream.printf("%6.4f\n", alphaLRT);
nullModel = null;
altModel = null;
thereAreValidHypotheses = true;
while (thereAreValidHypotheses) {
bestCompetingModel = null;
bestHypothesisIndex = 0;
if (forward) {
nullModel = currentModel;
bestLnDifference = 0;
} else {
altModel = currentModel;
bestLnDifference = 1000000;
}
// chose best competing model across hypotheses
for (i = 0; i < hypotheses.length; i++) {
lnDifference = 0;
competingModel = findCompetingModel(currentModel,
hypotheses[i], forward);
if (competingModel != null) {
if (ApplicationOptions.getInstance().getSubstTypeCode() == 3)
if (competingModel.getName().startsWith("TIM")
|| competingModel.getName().startsWith("TPM"))
competingModel = BestTIMTPM(competingModel);
lnDifference = Math.abs(currentModel.getLnL()
- competingModel.getLnL());
if ((forward && lnDifference > bestLnDifference)
|| (!forward && lnDifference < bestLnDifference)) {
bestCompetingModel = competingModel;
bestLnDifference = lnDifference;
bestHypothesisIndex = i;
}
}
}
// we should not be here
if (bestCompetingModel == null) {
stream.print("\nNo best competing model for any of the "
+ hypotheses.length + " remaining hypotheses: ");
for (i = 0; i < hypotheses.length; i++) {
stream.print(hypotheses[i]);
if (i < hypotheses.length - 1)
stream.print(" + ");
}
stream.println(", given the current model = "
+ currentModel.getName());
// System.err.println("hypothesis = " +
// hypotheses[bestHypothesisIndex] +
// " -- not found best competing model");
thereAreValidHypotheses = false;
continue;
}
/*
* stream.println("\nBest hypothesis = " +
* hypotheses[bestHypothesisIndex] + "\n current model = " +
* currentModel.name + "\n competing model = " +
* bestCompetingModel.name + "\n best lnL increase = " +
* bestLnDifference);
*/
// System.err.println("hypothesis = " +
// hypotheses[bestHypothesisIndex] +
// " -- found best competing model = " + bestCompetingModel.name);
if (forward)
altModel = bestCompetingModel;
else
nullModel = bestCompetingModel;
stream.println("\nTesting " + hypotheses[bestHypothesisIndex]
+ " hypothesis");
stream.printf("Null model = %-8s", nullModel.getName());
stream.printf("\t-lnL = %6.4f", nullModel.getLnL());
stream.printf("\nAlternative model = %-8s", altModel.getName());
stream.printf("\t-lnL = %6.4f", altModel.getLnL());
stream.println();
// test this best hypothesis
if (hypotheses[bestHypothesisIndex].equals("gamma")
|| hypotheses[bestHypothesisIndex].equals("pinv"))
P = LRTboundary(nullModel, altModel);
else
P = LRT(nullModel, altModel);
if (forward) {
if (P < alphaLRT)
currentModel = altModel;
else {
stream.println("\nThe current model could not be rejected");
if (ModelTest.buildGUI)
Utilities.toConsoleEnd();
break;
}
} else {
if (P > alphaLRT)
currentModel = nullModel;
else {
stream.println("\nThe current model rejected the null model");
if (ModelTest.buildGUI)
Utilities.toConsoleEnd();
break;
}
}
// remove the hypothesis tested
trimmedHypotheses = new String[hypotheses.length - 1];
for (i = j = 0; i < hypotheses.length; i++) {
if (i < bestHypothesisIndex)
trimmedHypotheses[j++] = hypotheses[i].toString();
else if (i > bestHypothesisIndex)
trimmedHypotheses[j++] = hypotheses[i].toString();
}
hypotheses = trimmedHypotheses;
if (hypotheses.length <= 0)
thereAreValidHypotheses = false;
if (ModelTest.buildGUI)
Utilities.toConsoleEnd();
}
ModelTest.setMinDLRT(currentModel);
stream.println("\n Model selected: ");
ModelTest.getMinDLRT().print(stream);
// print PAUP* block
if (writePAUPblock)
ModelTest.WritePaupBlock(stream, "dLRT", ModelTest.getMinDLRT());
if (ModelTest.buildGUI) {
Utilities.toConsoleEnd();
System.out.println("OK");
}
}
/****************************
* findCompetingModel*********************** * Finds the corresponding
* competing model given the current * model and hypothesis that is being
* tested. The null model * has to be nested within the alternative, and
* they can only differ * by one set of parameters * *
***********************************************************************/
public Model findCompetingModel(Model current, String test, boolean adding) {
int distance;
boolean isTest;
Model competing, found;
found = null;
for (Model model : ModelTest.getCandidateModels()) {
competing = model;
isTest = false;
distance = 0;
// if step forward compare only with more complex models
if (adding && current.getK() - competing.getK() >= 0)
continue;
// if step down compare only with simpler models
else if (!adding && current.getK() - competing.getK() <= 0)
continue;
if (current.ispF() != competing.ispF()) {
if (test.equals("freq"))
isTest = true;
distance++;
}
if ((!current.ispT() && competing.ispT())
|| (!competing.ispT() && current.ispT())) {
if (test.equals("titv"))
isTest = true;
if (!current.ispR() && !competing.ispR()) // because we will
// increase this
// distance later
// for models with
// pR
distance++;
}
if (current.ispG() != competing.ispG()) {
if (test.equals("gamma"))
isTest = true;
distance++;
}
if (current.ispI() != competing.ispI()) {
if (test.equals("pinv"))
isTest = true;
distance++;
}
// because JC and GTR have the same pT ...
if (current.ispT() == competing.ispT()
&& current.ispR() != competing.ispR())
isTest = false;
if (ApplicationOptions.getInstance().getSubstTypeCode() == 0) {
if ((Math.abs(competing.getNumTi() - current.getNumTi()) == 1)
&& (Math.abs(competing.getNumTv() - current.getNumTv()) == 3)) {
if (test.equals("2ti4tv"))
isTest = true;
distance++;
}
} else if (((adding) && (current.ispT() || current.ispR()))
|| ((!adding) && (competing.ispT() || competing.ispR()))) {
if (Math.abs(competing.getNumTi() - current.getNumTi()) == 1) {
if (test.equals("2ti"))
isTest = true;
distance++;
}
if (Math.abs(competing.getNumTv() - current.getNumTv()) == 1) {
if (test.equals("2tv"))
isTest = true;
distance++;
} else if (Math.abs(competing.getNumTv() - current.getNumTv()) == 2) {
if (test.equals("4tv"))
isTest = true;
distance++;
} else if (Math.abs(competing.getNumTv() - current.getNumTv()) == 3)
isTest = false;
}
if (isTest && distance == 1) {
found = model;
break;
}
// System.err.println("test = " + test + " current = " +
// current.name + " competing = " + competing.name +
// " isTest = " + isTest + " distance = " + distance);
}
return found;
}
/****************************
* BestTIMTPM ******************************* * Returns the corresponding
* individual model within the families * TPM, TPMuf, TIM and TIMef * *
*************************************************************************/
public Model BestTIMTPM(Model whichModel) {
Model foundModel = null;
String name, model1, model2, model3;
name = whichModel.getName();
model1 = name.replaceFirst("[0-9]", "1");
model2 = name.replaceFirst("[0-9]", "2");
model3 = name.replaceFirst("[0-9]", "3");
// System.err.println ("Find best among " + model1 + " and " + model2 +
// " and " + model3);
foundModel = findBest(model1, model2, model3);
// System.err.println ("Found :" + foundModel.name);
return foundModel;
}
/****************************
* findBest ******************************* * Finds the model with the
* highest likelihood out of three * TIM or TPM given models * *
************************************************************************/
public Model findBest(String modelName1, String modelName2,
String modelName3) {
Model m1, m2, m3;
m1 = m2 = m3 = null;
for (Model model : ModelTest.getCandidateModels()) {
if (modelName1.equals(model.getName()))
m1 = model;
else if (modelName2.equals(model.getName()))
m2 = model;
else if (modelName3.equals(model.getName()))
m3 = model;
}
// check if we already know the familiy number
if (m1.getName().startsWith("TIM") && TIMnumber > 0) {
if (TIMnumber == 1)
return m1;
else if (TIMnumber == 2)
return m2;
else
return m3;
} else if (m1.getName().startsWith("TPM") && TPMnumber > 0) {
if (TPMnumber == 1)
return m1;
else if (TPMnumber == 2)
return m2;
else
return m3;
}
// otherwise we will select it now
stream.println("Selecting first best representative model among:");
stream.println(" " + m1.getName() + " (-lnL = " + m1.getLnL() + ")");
stream.println(" " + m2.getName() + " (-lnL = " + m2.getLnL() + ")");
stream.println(" " + m3.getName() + " (-lnL = " + m3.getLnL() + ")");
if (m1.getLnL() <= m2.getLnL() && m1.getLnL() <= m3.getLnL()) {
if (m1.getName().startsWith("TIM"))
TIMnumber = 1;
else
TPMnumber = 1;
return m1;
} else if (m2.getLnL() <= m1.getLnL() && m2.getLnL() <= m3.getLnL()) {
if (m2.getName().startsWith("TIM"))
TIMnumber = 2;
else
TPMnumber = 2;
return m2;
} else {
if (m3.getName().startsWith("TIM"))
TIMnumber = 3;
else
TPMnumber = 3;
return m3;
}
}
/****************************
* LRT ************************************* * Computes a likelihood ratio
* test between two model and returns * a P-value according to a standard
* chi2 distribution * *
***********************************************************************/
private double LRT(Model model0, Model model1) {
double delta, prob;
int df;
delta = 2 * (model0.getLnL() - model1.getLnL());
df = model1.getK() - model0.getK();
if (delta == 0)
prob = 1.0;
else
prob = Statistics.chiSquareProbability(delta, df);
stream.printf("2(lnL1-lnL0) = %6.4f", delta);
if (prob == 1.0)
stream.println("\tP-value > " + MAX_PROB);
else if (prob < 0.000001)
stream.println("\tP-value < " + MIN_PROB);
else
stream.printf("\tP-value = %6.4f\n", prob);
return prob;
}
/***************************
* LRTboundary******************************* * Computes a likelihood ratio
* test between two model and returns * a P-value according to a mixed chi2
* distribution * *
***********************************************************************/
private double LRTboundary(Model model0, Model model1) {
double delta, prob;
int df;
delta = 2 * (model0.getLnL() - model1.getLnL());
df = model1.getK() - model0.getK();
if (delta == 0)
prob = 1.0;
else {
if (df == 1)
prob = Statistics.chiSquareProbability(delta, df) / 2;
else
prob = (Statistics.chiSquareProbability(delta, df - 1) + Statistics
.chiSquareProbability(delta, df)) / 2;
}
stream.printf("2(lnL1-lnL0) = %6.4f", delta);
if (prob == 1.0)
stream.println("\tP-value > " + MAX_PROB);
else if (prob < 0.000001)
stream.println("\tP-value < " + MIN_PROB);
else
stream.printf("\tP-value = %6.4f\n", prob);
return prob;
}
} // class hLRT