/* * Patterns.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.evolution.alignment; import dr.evolution.datatype.DataType; import dr.evolution.util.Taxon; import dr.evolution.util.TaxonList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; /** * A concrete implementation of PatternList. Patterns can be added and * removed from the list individually or in bulk from an alignment. * * @author Andrew Rambaut * @author Alexei Drummond * @version $Id: Patterns.java,v 1.10 2005/07/08 11:27:53 rambaut Exp $ */ public class Patterns implements PatternList { public static final int COUNT_INCREMENT = 100; /** * number of patterns */ protected int patternCount = 0; /** * length of patterns */ protected int patternLength = 0; /** * weights of each pattern */ protected double[] weights = new double[COUNT_INCREMENT]; /** * site patterns [pattern][taxon] */ protected int[][] patterns = new int[COUNT_INCREMENT][]; protected DataType dataType = null; protected TaxonList taxonList = null; /** * Constructor */ public Patterns(DataType dataType) { this.dataType = dataType; } /** * Constructor */ public Patterns(DataType dataType, TaxonList taxonList) { this.dataType = dataType; this.taxonList = taxonList; patternLength = taxonList.getTaxonCount(); } /** * Constructor */ public Patterns(SiteList siteList) { addPatterns(siteList, 0, 0, 1); } /** * Constructor */ public Patterns(List<SiteList> siteLists) { for (SiteList siteList : siteLists) { addPatterns(siteList, 0, 0, 1); } } /** * Constructor */ public Patterns(SiteList siteList, int from, int to, int every) { addPatterns(siteList, from, to, every); } /** * Constructor */ public Patterns(SiteList siteList, int from, int to, int every, int subSet, int subSetCount) { addPatterns(siteList, from, to, every); subSetPatterns(subSet, subSetCount); } /** * Constructor */ public Patterns(PatternList patternList) { addPatterns(patternList); } /** * Constructor */ public Patterns(PatternList patternList, int subSet, int subSetCount) { addPatterns(patternList); subSetPatterns(subSet, subSetCount); } private void subSetPatterns(int subSet, int subSetCount) { if (subSetCount > 0) { // if we are using subSetCount then cut it down to only the subset we want... int div = patternCount / subSetCount; int rem = patternCount % subSetCount; int start = 0; for (int i = 0; i < subSet; i++) { start += div + (i < rem ? 1 : 0); } int newPatternCount = div; if (subSet < rem) { newPatternCount++; } int[][] newPatterns = new int[newPatternCount][]; double[] newWeights = new double[newPatternCount]; for (int i = 0; i < newPatternCount; i++) { newPatterns[i] = patterns[start + i]; newWeights[i] = weights[start + i]; } patterns = newPatterns; weights = newWeights; patternCount = newPatternCount; } } /** * adds patterns to the list from a SiteList */ public void addPatterns(SiteList siteList, int from, int to, int every) { if (siteList == null) { return; } if (taxonList == null) { taxonList = siteList; patternLength = taxonList.getTaxonCount(); } if (dataType == null) { dataType = siteList.getDataType(); } else if (dataType != siteList.getDataType()) { throw new IllegalArgumentException("Patterns' existing DataType does not match that of added SiteList"); } if (from < 0) from = 0; if (to <= 0) to = siteList.getSiteCount() - 1; if (every <= 0) every = 1; for (int i = from; i <= to; i += every) { int[] pattern = siteList.getSitePattern(i); // don't add patterns that are all gaps or all ambiguous if (pattern != null && (!isInvariant(pattern) || (!isGapped(pattern) && !isAmbiguous(pattern) && !isUnknown(pattern)))) { addPattern(pattern, 1.0); } } areUnique = siteList.areUnique(); } /** * adds patterns to the list from a SiteList */ public void addPatterns(PatternList patternList) { if (patternList == null) { return; } if (taxonList == null) { taxonList = patternList; patternLength = taxonList.getTaxonCount(); } if (dataType == null) { dataType = patternList.getDataType(); } else if (dataType != patternList.getDataType()) { throw new IllegalArgumentException("Patterns' existing DataType does not match that of added PatternList"); } for (int i = 0; i < patternList.getPatternCount(); i++) { int[] pattern = patternList.getPattern(i); // don't add patterns that are all gaps or all ambiguous if (!isInvariant(pattern) || (!isGapped(pattern) && !isAmbiguous(pattern) && !isUnknown(pattern))) { addPattern(pattern, patternList.getPatternWeight(i)); } } areUnique = patternList.areUnique(); } /** * adds a pattern to the pattern list with a default weight of 1 */ public void addPattern(int[] pattern) { addPattern(pattern, 1.0); } /** * adds a pattern to the pattern list */ public void addPattern(int[] pattern, double weight) { if (patternLength == 0) { patternLength = pattern.length; } if (patternLength != 0 && pattern.length != patternLength) { throw new IllegalArgumentException("Added pattern's length (" + pattern.length + ") does not match those of existing patterns (" + patternLength + ")"); } for (int i = 0; i < patternCount; i++) { if (comparePatterns(patterns[i], pattern)) { weights[i] += weight; return; } } if (patternCount == patterns.length) { int[][] newPatterns = new int[patternCount + COUNT_INCREMENT][]; double[] newWeights = new double[patternCount + COUNT_INCREMENT]; for (int i = 0; i < patternCount; i++) { newPatterns[i] = patterns[i]; newWeights[i] = weights[i]; } patterns = newPatterns; weights = newWeights; } patterns[patternCount] = pattern; weights[patternCount] = weight; patternCount++; } /** * removes a pattern from the pattern list */ public void removePattern(int[] pattern) { int index = -1; for (int i = 0; i < patternCount; i++) { if (comparePatterns(patterns[i], pattern)) { index = i; break; } } if (index == -1) throw new IllegalArgumentException("Pattern not found"); weights[index] -= 1; if (weights[index] == 0 && patternCount > 1) { patterns[index] = patterns[patternCount - 1]; patterns[patternCount - 1] = null; weights[index] = weights[patternCount - 1]; patternCount--; } } /** * removes all patterns from the pattern list */ public void removeAllPatterns() { patternCount = 0; for (int i = 0; i < patterns.length; i++) patterns[i] = null; } /** * @return true if the pattern has one or more gaps */ protected boolean isGapped(int[] pattern) { int len = pattern.length; for (int i = 0; i < len; i++) { if (getDataType().isGapState(pattern[i])) { return true; } } return false; } /** * @return true if the pattern has one or more ambiguous states */ protected boolean isAmbiguous(int[] pattern) { int len = pattern.length; for (int i = 0; i < len; i++) { if (getDataType().isAmbiguousState(pattern[i])) { return true; } } return false; } /** * @return true if the pattern is invariant */ protected boolean isUnknown(int[] pattern) { int len = pattern.length; for (int i = 0; i < len; i++) { if (getDataType().isUnknownState(pattern[i])) { return true; } } return false; } /** * @return true if the pattern is invariant */ protected static boolean isInvariant(int[] pattern) { int len = pattern.length; int state = pattern[0]; for (int i = 1; i < len; i++) { if (pattern[i] != state) { return false; } } return true; } /** * compares two patterns * * @return true if they are identical */ protected boolean comparePatterns(int[] pattern1, int[] pattern2) { int len = pattern1.length; for (int i = 0; i < len; i++) { if (pattern1[i] != pattern2[i]) { return false; } } return true; } // ************************************************************** // PatternList IMPLEMENTATION // ************************************************************** /** * @return number of patterns */ public int getPatternCount() { return patternCount; } /** * @return number of states for this siteList */ public int getStateCount() { return dataType.getStateCount(); } /** * Gets the length of the pattern strings which will usually be the * same as the number of taxa * * @return the length of patterns */ public int getPatternLength() { return patternLength; } /** * Gets the pattern as an array of state numbers (one per sequences) * * @return the pattern at patternIndex */ public int[] getPattern(int patternIndex) { return patterns[patternIndex]; } /** * @return state at (taxonIndex, patternIndex) */ public int getPatternState(int taxonIndex, int patternIndex) { return patterns[patternIndex][taxonIndex]; } /** * Gets the weight of a site pattern */ public double getPatternWeight(int patternIndex) { return weights[patternIndex]; } /** * @return the array of pattern weights */ public double[] getPatternWeights() { double[] w = new double[weights.length]; for (int i = 0; i < weights.length; i++) w[i] = weights[i]; return w; } /** * @return the DataType of this siteList */ public DataType getDataType() { return dataType; } /** * @return the frequency of each state */ public double[] getStateFrequencies() { return PatternList.Utils.empiricalStateFrequencies(this); } @Override public boolean areUnique() { return areUnique; } // ************************************************************** // TaxonList IMPLEMENTATION // ************************************************************** /** * @return a count of the number of taxa in the list. */ public int getTaxonCount() { if (taxonList == null) throw new RuntimeException("Patterns has no TaxonList"); return taxonList.getTaxonCount(); } /** * @return the ith taxon. */ public Taxon getTaxon(int taxonIndex) { if (taxonList == null) throw new RuntimeException("Patterns has no TaxonList"); return taxonList.getTaxon(taxonIndex); } /** * @return the ID of the ith taxon. */ public String getTaxonId(int taxonIndex) { if (taxonList == null) throw new RuntimeException("Patterns has no TaxonList"); return taxonList.getTaxonId(taxonIndex); } /** * returns the index of the taxon with the given id. */ public int getTaxonIndex(String id) { if (taxonList == null) throw new RuntimeException("Patterns has no TaxonList"); return taxonList.getTaxonIndex(id); } /** * returns the index of the given taxon. */ public int getTaxonIndex(Taxon taxon) { if (taxonList == null) throw new RuntimeException("Patterns has no TaxonList"); return taxonList.getTaxonIndex(taxon); } public List<Taxon> asList() { if (taxonList == null) throw new RuntimeException("Patterns has no TaxonList"); return taxonList.asList(); } public Iterator<Taxon> iterator() { if (taxonList == null) throw new RuntimeException("Patterns has no TaxonList"); return taxonList.iterator(); } /** * @param taxonIndex the index of the taxon whose attribute is being fetched. * @param name the name of the attribute of interest. * @return an object representing the named attributed for the given taxon. */ public Object getTaxonAttribute(int taxonIndex, String name) { if (taxonList == null) throw new RuntimeException("Patterns has no TaxonList"); return taxonList.getTaxonAttribute(taxonIndex, name); } // ************************************************************** // Identifiable IMPLEMENTATION // ************************************************************** protected String id = null; /** * @return the id. */ public String getId() { return id; } /** * Sets the id. */ public void setId(String id) { this.id = id; } // ========= Mask ========= // indexes to mask sth., e.g. taxon index whose state is unknown character in microsatellite protected Set<Integer> maskSet = new HashSet<Integer>(); // no duplication, if duplicate, not add public boolean addMask(int index) { return maskSet.add(index); } public boolean isMasked(int index) { return maskSet.contains(index); } public boolean hasMask() { return maskSet.size() > 0; } public void clearMask() { maskSet.clear(); } public Set<Integer> getMaskSet() { return maskSet; } /** * @return the ith taxon not masked. */ public Taxon getTaxonMasked(int taxonIndex) { if (taxonList == null) throw new RuntimeException("Patterns has no TaxonList"); if (isMasked(taxonIndex)) { return null; } return taxonList.getTaxon(taxonIndex); } private boolean areUnique = true; }