/* * File Sequence.java * * Copyright (C) 2010 Remco Bouckaert remco@cs.auckland.ac.nz * * This file is part of BEAST2. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package beast.evolution.alignment; import java.util.ArrayList; import java.util.Collection; import java.util.List; import beast.core.BEASTObject; import beast.core.Description; import beast.core.Input; import beast.evolution.datatype.DataType; @Description("Single sequence in an alignment.") public class Sequence extends BEASTObject { final public Input<Integer> totalCountInput = new Input<>("totalcount", "number of states or the number of lineages for this species in SNAPP analysis"); final public Input<String> taxonInput = new Input<>("taxon", "name of this species", Input.Validate.REQUIRED); final public Input<String> dataInput = new Input<>("value", "sequence data, either encoded as a string or as comma separated list of integers, or comma separated likelihoods/probabilities for each site if uncertain=true." + "In either case, whitespace is ignored.", Input.Validate.REQUIRED); final public Input<Boolean> uncertainInput = new Input<>("uncertain", "if true, sequence is provided as comma separated probabilities for each character, with sites separated by a semi-colons. In this formulation, gaps are coded as 1/K,...,1/K, where K is the number of states in the model."); protected boolean uncertain = false; protected double[][] likelihoods = null; public double[][] getLikelihoods() { return likelihoods; } public Sequence() { } /** * Constructor for testing. * * @param taxon * @param sequence */ public Sequence(String taxon, String sequence) { taxonInput.setValue(taxon, this); dataInput.setValue(sequence, this); initAndValidate(); } @Override public void initAndValidate() { if (uncertainInput.get() != null) { uncertain = uncertainInput.get(); if (uncertain) initProbabilities(); } } // initAndValidate public void initProbabilities() { String data = dataInput.get(); // remove spaces data = data.replaceAll("\\s", ""); String str = data.trim(); String[] strs = str.split(";"); for (int i=0; i<strs.length; i++) { String[] pr = strs[i].split(","); //double total = 0; for (int j=0; j<pr.length; j++) { if (likelihoods == null) likelihoods = new double[strs.length][pr.length]; likelihoods[i][j] = Double.parseDouble(pr[j].trim()); //total += likelihoods[i][j]; } } } public List<Integer> getSequence(DataType dataType) { List<Integer> sequence; if (uncertain) { sequence = new ArrayList<>(); for (int i=0; i<likelihoods.length; i++) { double m = likelihoods[i][0]; int index = 0; for (int j=0; j<likelihoods[i].length; j++) { if (likelihoods[i][j] > m ) { m = likelihoods[i][j]; index = j; } } sequence.add(index); } } else { String data = dataInput.get(); // remove spaces data = data.replaceAll("\\s", ""); sequence = dataType.string2state(data); } if (totalCountInput.get() == null) { // derive default from char-map totalCountInput.setValue(dataType.getStateCount(), this); } return sequence; } /** * @return the taxon of this sequence as a string. */ public final String getTaxon() { return taxonInput.get(); } /** * @return the data of this sequence as a string. */ public final String getData() { return dataInput.get(); } int mapCharToData(String dataMap, char c) { int i = dataMap.indexOf(c); if (i >= 0) { return i; } return dataMap.length(); } // mapCharToData /** * @param id of target sequence * @param sequences a collection of sequences * @return the sequence in the collection with the given ID, or null if its not in the collection. */ public static Sequence getSequenceByTaxon(String id, Collection<Sequence> sequences) { for (Sequence seq : sequences) { if (seq.getTaxon().equals(id)) return seq; } return null; } @Override public String toString() { return getTaxon() + ":" + getData(); } } // class Sequence