/* * PatternList.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.evolution.alignment; import dr.evolution.datatype.DataType; import dr.evolution.util.TaxonList; import dr.util.Identifiable; /** * interface for any list of patterns with weights. * * @author Andrew Rambaut * @version $Id: PatternList.java,v 1.12 2005/05/24 20:25:55 rambaut Exp $ */ public interface PatternList extends TaxonList, Identifiable { /** * @return number of patterns */ int getPatternCount(); /** * @return number of states for this PatternList */ int getStateCount(); /** * Gets the length of the pattern strings which will usually be the * same as the number of taxa * * @return the length of patterns */ int getPatternLength(); /** * Gets the pattern as an array of state numbers (one per sequence) * * @param patternIndex the index of the pattern to return * @return the site pattern at patternIndex */ int[] getPattern(int patternIndex); /** * @param taxonIndex the taxon * @param patternIndex the pattern * @return state at (taxonIndex, patternIndex) */ int getPatternState(int taxonIndex, int patternIndex); /** * Gets the weight of a site pattern * * @param patternIndex the pattern * @return the weight of the specified pattern */ double getPatternWeight(int patternIndex); /** * @return the array of pattern weights */ double[] getPatternWeights(); /** * @return the DataType of this PatternList */ DataType getDataType(); /** * @return the frequency of each state */ double[] getStateFrequencies(); /** * Are the patterns only the unique ones (i.e., compressed)? * @return are unique? */ boolean areUnique(); /** * Helper routines for pattern lists. */ public static class Utils { /** * Returns a double array containing the empirically estimated frequencies * for the states of patternList. This currently calls the version that maps * to PAUP's estimates. * * @param patternList the pattern list to calculate the empirical state * frequencies from * @return the empirical state frequencies of the given pattern list */ public static double[] empiricalStateFrequencies(PatternList patternList) { return empiricalStateFrequenciesPAUP(patternList); } /** * Returns a double array containing the empirically estimated frequencies * for the states of patternList. This version of the routine should match * the values produced by PAUP. * * @param patternList the pattern list to calculate the empirical state * frequencies from * @return the empirical state frequencies of the given pattern list */ public static double[] empiricalStateFrequenciesPAUP(PatternList patternList) { int i, j, k; double total, sum, x, w, difference; DataType dataType = patternList.getDataType(); int stateCount = patternList.getStateCount(); int patternLength = patternList.getPatternLength(); int patternCount = patternList.getPatternCount(); double[] freqs = equalStateFrequencies(patternList); double[] tempFreq = new double[stateCount]; int[] pattern; boolean[] state; int count = 0; do { for (i = 0; i < stateCount; i++) tempFreq[i] = 0.0; total = 0.0; for (i = 0; i < patternCount; i++) { pattern = patternList.getPattern(i); w = patternList.getPatternWeight(i); for (k = 0; k < patternLength; k++) { state = dataType.getStateSet(pattern[k]); sum = 0.0; for (j = 0; j < stateCount; j++) if (state[j]) sum += freqs[j]; for (j = 0; j < stateCount; j++) { if (state[j]) { x = (freqs[j] * w) / sum; tempFreq[j] += x; total += x; } } } } difference = 0.0; for (i = 0; i < stateCount; i++) { difference += Math.abs((tempFreq[i] / total) - freqs[i]); freqs[i] = tempFreq[i] / total; } count ++; } while (difference > 1E-8 && count < 1000); return freqs; } /** * Returns a double array containing the empirically estimated frequencies * for the states of patternList. This version of the routine should match * the values produced by MrBayes. * * @param patternList the pattern list to calculate the empirical state * frequencies from * @return the empirical state frequencies of the given pattern list */ public static double[] empiricalStateFrequenciesMrBayes(PatternList patternList) { DataType dataType = patternList.getDataType(); int stateCount = patternList.getStateCount(); int patternLength = patternList.getPatternLength(); int patternCount = patternList.getPatternCount(); double[] freqs = equalStateFrequencies(patternList); double sumTotal = 0.0; double[] sumFreq = new double[stateCount]; for (int i = 0; i < patternCount; i++) { int[] pattern = patternList.getPattern(i); double w = patternList.getPatternWeight(i); for (int k = 0; k < patternLength; k++) { boolean[] state = dataType.getStateSet(pattern[k]); double sum = 0.0; for (int j = 0; j < stateCount; j++) { if (state[j]) { sum += freqs[j]; } } for (int j = 0; j < stateCount; j++) { if (state[j]) { double x = (freqs[j] * w) / sum; sumFreq[j] += x; sumTotal += x; } } } } for (int i = 0; i < stateCount; i++) { freqs[i] = sumFreq[i] / sumTotal; } return freqs; } /** * Returns a double array containing the equal frequencies * for the states of patternList. * * @param patternList the pattern list * @return return equal state frequencies based on the data type of * the patternlist */ public static double[] equalStateFrequencies(PatternList patternList) { int i, n = patternList.getStateCount(); double[] freqs = new double[n]; double f = 1.0 / n; for (i = 0; i < n; i++) freqs[i] = f; return freqs; } } }