/* Copyright (C) 2009 Diego Darriba This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package es.uvigo.darwin.prottest.selection; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Random; import es.uvigo.darwin.prottest.model.Model; import es.uvigo.darwin.prottest.selection.model.SelectionModel; import es.uvigo.darwin.prottest.util.collection.ModelCollection; import es.uvigo.darwin.prottest.util.collection.SingleModelCollection; import java.util.ArrayList; import pal.alignment.Alignment; /** * Generic implementation of an information criterion for model selection. * Model selection is used to model-averaging and estimate parameter importance * in an evolutionary context after computing likelihood values for every model. */ public abstract class InformationCriterion { /** The best model according the concrete criterion. */ protected SelectionModel bestModel; /** The models including some information about the criterion. */ protected List<SelectionModel> selectionModels; /** HashMap to quick find the selection models. */ protected HashMap<Model, SelectionModel> hashModels; /** The confidence interval. */ protected double confidenceInterval; /** The set of models that fall into the confidence interval. */ protected List<SelectionModel> confidenceModels; /** The cumulative weight of the parameter. */ protected double cumWeight; /** The overall alpha value. */ protected double overallAlpha = 0.0; /** The overall invariant value. */ protected double overallInv = 0.0; /** The overall alpha + invariant value. */ protected double overallAlphaInv = 0.0; /** The overall invariant + alpha value. */ protected double overallInvAlpha = 0.0; /** The alpha parameter importance. */ protected double alphaImportance = 0.0; /** The invariant parameter importance. */ protected double invImportance = 0.0; /** The alpha + invariant parameter importance. */ protected double alphaInvImportance = 0.0; /** The +F parameter importance. */ protected double fImportance = 0.0; /** The sample size. */ protected double sampleSize; /** The alignment */ protected Alignment alignment; private boolean existInvModels = false, existGammaModels = false, existGammaInvModels = false, existFModels = false; public boolean isExistFModels() { return existFModels; } public boolean isExistGammaInvModels() { return existGammaInvModels; } public boolean isExistGammaModels() { return existGammaModels; } public boolean isExistInvModels() { return existInvModels; } /** * Gets the overall alpha value. * * @return the overall alpha value */ public double getOverallAlpha() { return overallAlpha; } /** * Gets the overall invariant value. * * @return the overall invariant value */ public double getOverallInv() { return overallInv; } /** * Gets the overall alpha + invariant value. * * @return the overall alpha + invariant value */ public double getOverallAlphaInv() { return overallAlphaInv; } /** * Gets the overall invariant + alpha value. * * @return the overall invariant + alpha value */ public double getOverallInvAlpha() { return overallInvAlpha; } /** * Gets the alpha parameter importance. * * @return the alpha parameter importance */ public double getAlphaImportance() { return alphaImportance; } /** * Gets the invariant parameter importance. * * @return the invariant parameter importance */ public double getInvImportance() { return invImportance; } /** * Gets the alpha + invariant parameter importance. * * @return the alpha + invariant parameter importance */ public double getAlphaInvImportance() { return alphaInvImportance; } /** * Gets the +F parameter importance. * * @return the +F parameter importance */ public double getFImportance() { return fImportance; } /** * Gets the sample size. * * @return the sample size */ public double getSampleSize() { return sampleSize; } /** * Gets the confidence interval value * * @return the confidence interval */ public double getConfidenceInterval() { return confidenceInterval; } /** * Gets the list of selection models. * * @return the list of selection models */ public List<SelectionModel> getSelectionModels() { return selectionModels; } /** * Instantiates a new model selection criterion. * * @param models the models to compute * @param confidenceInterval the confidence interval * @param sampleSize the sample size if different of the default */ public InformationCriterion(ModelCollection models, double confidenceInterval, double sampleSize) { this.alignment = models.getAlignment(); this.sampleSize = sampleSize; int numberOfModels = models.size(); this.confidenceInterval = confidenceInterval; this.hashModels = new HashMap<Model, SelectionModel>(numberOfModels); this.confidenceModels = new ArrayList<SelectionModel>(); this.selectionModels = getSelectionModels(models); Collections.sort(selectionModels); for (Model model : models) { if (!existInvModels && model.isInv() && !model.isGamma()) { existInvModels = true; } else if (!existGammaModels && !model.isInv() && model.isGamma()) { existGammaModels = true; } else if (!existGammaInvModels && model.isInv() && model.isGamma()) { existGammaInvModels = true; } else if (!existFModels && model.isPlusF()) { existFModels = true; } } compute(); } /** * Gets the list of selection models. * * @param models the models to calculate the information criterion * * @return the list of models having the information about the concrete criterion */ protected abstract List<SelectionModel> getSelectionModels(List<Model> models); /** * Computes the delta criterion, the weight and the cumulative weight of the criterion * over the model collection. */ private void compute() { double minValue, sumExp, cum; bestModel = selectionModels.get(0); minValue = bestModel.getValue(); // Calculate differences sumExp = 0; for (SelectionModel model : selectionModels) { model.setDeltaValue( model.getValue() - minValue); sumExp += Math.exp(-0.5 * model.getDeltaValue()); } // Calculate weights for (SelectionModel model : selectionModels) { if (model.getDeltaValue() > 1000) { model.setWeightValue(0.0); } else { model.setWeightValue( Math.exp(-0.5 * model.getDeltaValue()) / sumExp); } } // Calculate cumulative weights, overalls and parameters importance cum = 0.0; alphaImportance = 0.0; invImportance = 0.0; alphaInvImportance = 0.0; fImportance = 0.0; overallAlpha = 0.0; overallInv = 0.0; overallAlphaInv = 0.0; overallInvAlpha = 0.0; for (SelectionModel model : selectionModels) { cum += model.getWeightValue(); model.setCumulativeWeightValue(cum); if (model.getModel().isGamma() && model.getModel().isInv()) { alphaInvImportance += model.getWeightValue(); overallAlphaInv += model.getWeightValue() * model.getModel().getAlpha(); overallInvAlpha += model.getWeightValue() * model.getModel().getInv(); } else if (model.getModel().isGamma()) { alphaImportance += model.getWeightValue(); overallAlpha += model.getWeightValue() * model.getModel().getAlpha(); } else if (model.getModel().isInv()) { invImportance += model.getWeightValue(); overallInv += model.getWeightValue() * model.getModel().getInv(); } if (model.getModel().isPlusF()) { fImportance += model.getWeightValue(); } } overallAlpha /= alphaImportance; overallInv /= invImportance; overallInvAlpha /= alphaInvImportance; overallAlphaInv /= alphaInvImportance; // confidence interval buildConfidenceInterval(); } /** * Builds the confidence interval of selected models * and their cumulative weight * * The model that just passed the confidence will be or not * in the interval by chance (see below). */ public void buildConfidenceInterval() { confidenceModels = new ArrayList<SelectionModel>(); // first construct the confidence interval for models if (confidenceInterval >= 1.0) { for (SelectionModel model : selectionModels) { model.setInConfidenceInterval(true); confidenceModels.add(model); } cumWeight = 1.0; } else { cumWeight = 0.0; SelectionModel lastModel = null; for (SelectionModel model : selectionModels) { if (model.getCumulativeWeightValue() <= confidenceInterval) { confidenceModels.add(model); cumWeight += model.getWeightValue(); } else { lastModel = model; break; } } // lets decide whether the model that just passed the confidence // interval should be included (suggested by John Huelsenbeck) double probOut = (lastModel.getCumulativeWeightValue() - confidenceInterval) / lastModel.getWeightValue(); double probIn = 1.0 - probOut; Random generator = new Random(); double randomNumber = generator.nextDouble(); if (randomNumber <= probIn) { lastModel.setInConfidenceInterval(true); confidenceModels.add(lastModel); cumWeight += lastModel.getWeightValue(); } else { lastModel.setInConfidenceInterval(false); } } } /** * All iterator. * * @return the iterator< selection model> */ public Iterator<SelectionModel> allIterator() { return selectionModels.iterator(); } /** * Confidence collection. * * @return the collection of confidence selection models */ public Collection<SelectionModel> getConfidenceModels() { return confidenceModels; } /** * Gets the model collection. * * @return the model collection */ public ModelCollection getModelCollection() { ModelCollection models = new SingleModelCollection(alignment); for (SelectionModel model : confidenceModels) { models.add(model.getModel()); } return models; } /** * Gets the SelectionModel in an specific position of the * collection, sorted by this criterion value. * * @param index the index * * @return the SelectionModel */ public SelectionModel get(int index) { return selectionModels.get(index); } /** * Gets the SelectionModel filled with this criterion data that * wraps the specified model. * @see SelectionModel * * @param model the model contained in the SelectionModel * * @return the SelectionModel that wraps the model * */ public SelectionModel get(Model model) { return hashModels.get(model); } /** * Gets the best model according with the concrete criterion. * (i.e. the model with the lowest criterion value) * @see SelectionModel#getValue() * * @return the best model */ public Model getBestModel() { return bestModel.getModel(); } /** * Gets the criterion name. * * @return the criterion name */ public abstract String getCriterionName(); }