/*
* SimpleAlignment.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.evolution.alignment;
import dr.app.bss.XMLExporter;
import dr.app.tools.NexusExporter;
import dr.evolution.datatype.Codons;
import dr.evolution.datatype.DataType;
import dr.evolution.datatype.GeneralDataType;
import dr.evolution.sequence.Sequence;
import dr.evolution.sequence.Sequences;
import dr.evolution.util.Taxon;
import dr.evolution.util.TaxonList;
import dr.util.NumberFormatter;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Collections;
import java.util.List;
/**
* A simple alignment class that implements gaps by characters in the sequences.
*
* @author Andrew Rambaut
* @author Alexei Drummond
* @version $Id: SimpleAlignment.java,v 1.46 2005/06/21 16:25:15 beth Exp $
*/
@SuppressWarnings("serial")
public class SimpleAlignment extends Sequences implements Alignment, dr.util.XHTMLable {
// **************************************************************
// INSTANCE VARIABLES
// **************************************************************
private OutputType outputType = OutputType.FASTA;
private DataType dataType = null;
private int siteCount = 0;
private boolean siteCountKnown = false;
private boolean countStatistics = !(dataType instanceof Codons) && !(dataType instanceof GeneralDataType);
// **************************************************************
// SimpleAlignment METHODS
// **************************************************************
/**
* parameterless constructor.
*/
public SimpleAlignment() {
}
/**
* Constructs a sub alignment based on the provided taxa.
*
* @param a
* @param taxa
*/
public SimpleAlignment(Alignment a, TaxonList taxa) {
for (int i = 0; i < taxa.getTaxonCount(); i++) {
Taxon taxon = taxa.getTaxon(i);
Sequence sequence = a.getSequence(a.getTaxonIndex(taxon));
addSequence(sequence);
}
}
public void setOutputType(OutputType out) {
outputType = out;
}
public List<Sequence> getSequences() {
return Collections.unmodifiableList(sequences);
}
/**
* Calculates the siteCount by finding the longest sequence.
*/
public void updateSiteCount() {
siteCount = 0;
int i, len, n = getSequenceCount();
for (i = 0; i < n; i++) {
len = getSequence(i).getLength();
if (len > siteCount)
siteCount = len;
}
siteCountKnown = true;
}
// **************************************************************
// Alignment IMPLEMENTATION
// **************************************************************
/**
* Sets the dataType of this alignment. This should be the same as
* the sequences.
*/
public void setDataType(DataType dataType) {
this.dataType = dataType;
}
/**
* @return number of sites
*/
public int getSiteCount(DataType dataType) {
return getSiteCount();
}
/**
* sequence character at (sequence, site)
*/
public char getChar(int sequenceIndex, int siteIndex) {
return getSequence(sequenceIndex).getChar(siteIndex);
}
/**
* Returns string representation of single sequence in
* alignment with gap characters included.
*/
public String getAlignedSequenceString(int sequenceIndex) {
return getSequence(sequenceIndex).getSequenceString();
}
/**
* Returns string representation of single sequence in
* alignment with gap characters excluded.
*/
public String getUnalignedSequenceString(int sequenceIndex) {
StringBuffer unaligned = new StringBuffer();
for (int i = 0, n = getSiteCount(); i < n; i++) {
int state = getState(sequenceIndex, i);
if (!dataType.isGapState(state)) {
unaligned.append(dataType.getChar(state));
}
}
return unaligned.toString();
}
// **************************************************************
// Sequences METHODS
// **************************************************************
/**
* Add a sequence to the sequence list
*/
public void addSequence(Sequence sequence) {
if (dataType == null) {
if (sequence.getDataType() == null) {
dataType = sequence.guessDataType();
sequence.setDataType(dataType);
} else {
setDataType(sequence.getDataType());
}
} else if (sequence.getDataType() == null) {
sequence.setDataType(dataType);
} else if (dataType != sequence.getDataType()) {
throw new IllegalArgumentException("Sequence's dataType does not match the alignment's");
}
int invalidCharAt = getInvalidChar(sequence.getSequenceString(), dataType);
if (invalidCharAt >= 0)
throw new IllegalArgumentException("Sequence of " + sequence.getTaxon().getId()
+ " contains invalid char \'" + sequence.getChar(invalidCharAt) + "\' at index " + invalidCharAt);
super.addSequence(sequence);
updateSiteCount();
}
/**
* Insert a sequence to the sequence list at position
*/
public void insertSequence(int position, Sequence sequence) {
if (dataType == null) {
if (sequence.getDataType() == null) {
dataType = sequence.guessDataType();
sequence.setDataType(dataType);
} else {
setDataType(sequence.getDataType());
}
} else if (sequence.getDataType() == null) {
sequence.setDataType(dataType);
} else if (dataType != sequence.getDataType()) {
throw new IllegalArgumentException("Sequence's dataType does not match the alignment's");
}
int invalidCharAt = getInvalidChar(sequence.getSequenceString(), dataType);
if (invalidCharAt >= 0)
throw new IllegalArgumentException("Sequence of " + sequence.getTaxon().getId()
+ " contains invalid char \'" + sequence.getChar(invalidCharAt) + "\' at index " + invalidCharAt);
super.insertSequence(position, sequence);
}
/**
* search invalid character in the sequence by given data type, and return its index
*/
protected int getInvalidChar(String sequence, DataType dataType) {
final char[] validChars = dataType.getValidChars();
if (validChars != null) {
String validString = new String(validChars);
for (int i = 0; i < sequence.length(); i++) {
char c = sequence.charAt(i);
if (validString.indexOf(c) < 0) return i;
}
}
return -1;
}
// **************************************************************
// SiteList IMPLEMENTATION
// **************************************************************
/**
* @return number of sites
*/
public int getSiteCount() {
if (!siteCountKnown)
updateSiteCount();
return siteCount;
}
/**
* Gets the pattern of site as an array of state numbers (one per sequence)
*
* @return the site pattern at siteIndex
*/
public int[] getSitePattern(int siteIndex) {
Sequence seq;
int i, n = getSequenceCount();
int[] pattern = new int[n];
for (i = 0; i < n; i++) {
seq = getSequence(i);
if (siteIndex >= seq.getLength())
pattern[i] = dataType.getGapState();
else
pattern[i] = seq.getState(siteIndex);
}
return pattern;
}
/**
* Gets the pattern index at a particular site
*
* @return the patternIndex
*/
public int getPatternIndex(int siteIndex) {
return siteIndex;
}
/**
* @return the sequence state at (taxon, site)
*/
public int getState(int taxonIndex, int siteIndex) {
Sequence seq = getSequence(taxonIndex);
if (siteIndex >= seq.getLength()) {
return dataType.getGapState();
}
return seq.getState(siteIndex);
}
/**
*/
public void setState(int taxonIndex, int siteIndex, int state) {
Sequence seq = getSequence(taxonIndex);
if (siteIndex >= seq.getLength()) {
throw new IllegalArgumentException();
}
seq.setState(siteIndex, state);
}
// **************************************************************
// PatternList IMPLEMENTATION
// **************************************************************
/**
* @return number of patterns
*/
public int getPatternCount() {
return getSiteCount();
}
/**
* @return number of invariant sites
*/
public int getInvariantCount() {
int invariantSites = 0;
for (int i = 0; i < getSiteCount(); i++) {
int[] pattern = getSitePattern(i);
if (Patterns.isInvariant(pattern)) {
invariantSites++;
}
}
return invariantSites;
}
public int getUniquePatternCount() {
Patterns patterns = new Patterns(this);
return patterns.getPatternCount();
}
public int getInformativeCount() {
Patterns patterns = new Patterns(this);
int informativeCount = 0;
for (int i = 0; i < patterns.getPatternCount(); i++) {
int[] pattern = patterns.getPattern(i);
if (isInformative(pattern)) {
informativeCount += patterns.getPatternWeight(i);
}
}
return informativeCount;
}
public int getSingletonCount() {
Patterns patterns = new Patterns(this);
int singletonCount = 0;
for (int i = 0; i < patterns.getPatternCount(); i++) {
int[] pattern = patterns.getPattern(i);
if (!Patterns.isInvariant(pattern) && !isInformative(pattern)) {
singletonCount += patterns.getPatternWeight(i);
}
}
return singletonCount;
}
private boolean isInformative(int[] pattern) {
int[] stateCounts = new int[getStateCount()];
for (int j = 0; j < pattern.length; j++) {
stateCounts[pattern[j]]++;
}
boolean oneStateGreaterThanOne = false;
boolean secondStateGreaterThanOne = false;
for (int j = 0; j < stateCounts.length; j++) {
if (stateCounts[j] > 1) {
if (!oneStateGreaterThanOne) {
oneStateGreaterThanOne = true;
} else {
secondStateGreaterThanOne = true;
}
}
}
return secondStateGreaterThanOne;
}
/**
* @return number of states for this siteList
*/
public int getStateCount() {
return getDataType().getStateCount();
}
/**
* Gets the length of the pattern strings which will usually be the
* same as the number of taxa
*
* @return the length of patterns
*/
public int getPatternLength() {
return getSequenceCount();
}
/**
* Gets the pattern as an array of state numbers (one per sequence)
*
* @return the pattern at patternIndex
*/
public int[] getPattern(int patternIndex) {
return getSitePattern(patternIndex);
}
/**
* @return state at (taxonIndex, patternIndex)
*/
public int getPatternState(int taxonIndex, int patternIndex) {
return getState(taxonIndex, patternIndex);
}
/**
* Gets the weight of a site pattern (always 1.0)
*/
public double getPatternWeight(int patternIndex) {
return 1.0;
}
/**
* @return the array of pattern weights
*/
public double[] getPatternWeights() {
double[] weights = new double[siteCount];
for (int i = 0; i < siteCount; i++)
weights[i] = 1.0;
return weights;
}
/**
* @return the DataType of this siteList
*/
public DataType getDataType() {
return dataType;
}
/**
* @return the frequency of each state
*/
public double[] getStateFrequencies() {
return PatternList.Utils.empiricalStateFrequencies(this);
}
@Override
public boolean areUnique() {
return false;
}
public void setReportCountStatistics(boolean report) {
countStatistics = report;
}
public String toString() {
return outputType.makeOutputString(this); // generic delegation to ease extensibility
}// END: toString
public String toXHTML() {
String xhtml = "<p><em>Alignment</em> data type = ";
xhtml += getDataType().getDescription();
xhtml += ", no. taxa = ";
xhtml += getTaxonCount();
xhtml += ", no. sites = ";
xhtml += getSiteCount();
xhtml += "</p>";
xhtml += "<pre>";
int length, maxLength = 0;
for (int i = 0; i < getTaxonCount(); i++) {
length = getTaxonId(i).length();
if (length > maxLength)
maxLength = length;
}
for (int i = 0; i < getTaxonCount(); i++) {
length = getTaxonId(i).length();
xhtml += getTaxonId(i);
for (int j = length; j <= maxLength; j++)
xhtml += " ";
xhtml += getAlignedSequenceString(i) + "\n";
}
xhtml += "</pre>";
return xhtml;
}
public enum OutputType {
FASTA("fasta", "fsa") {
@Override
public String makeOutputString(SimpleAlignment alignment) {
NumberFormatter formatter = new NumberFormatter(6);
StringBuffer buffer = new StringBuffer();
if (alignment.countStatistics) {
buffer.append("Site count = ").append(alignment.getSiteCount()).append("\n");
buffer.append("Invariant sites = ").append(alignment.getInvariantCount()).append("\n");
buffer.append("Singleton sites = ").append(alignment.getSingletonCount()).append("\n");
buffer.append("Parsimony informative sites = ").append(alignment.getInformativeCount()).append("\n");
buffer.append("Unique site patterns = ").append(alignment.getUniquePatternCount()).append("\n\n");
}
for (int i = 0; i < alignment.getSequenceCount(); i++) {
String name = formatter.formatToFieldWidth(alignment.getTaxonId(i), 10);
buffer.append(">" + name + "\n");
buffer.append(alignment.getAlignedSequenceString(i) + "\n");
}
return buffer.toString();
}
},
NEXUS("nexus", "nxs") {
@Override
public String makeOutputString(SimpleAlignment alignment) {
StringBuffer buffer = new StringBuffer();
try {
File tmp = File.createTempFile("tempfile", ".tmp");
PrintStream ps = new PrintStream(tmp);
NexusExporter nexusExporter = new NexusExporter(ps);
buffer.append(nexusExporter.exportAlignment(alignment));
} catch (IllegalArgumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buffer.toString();
}// END: makeOutputString
},
XML("xml", "xml") {
@Override
public String makeOutputString(SimpleAlignment alignment) {
StringBuffer buffer = new StringBuffer();
try {
XMLExporter xmlExporter = new XMLExporter();
buffer.append(xmlExporter.exportAlignment(alignment));
} catch (IllegalArgumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buffer.toString();
}// END: makeOutputString
};
// public static OutputType getValue(String str) {
// if (FASTA.name().equalsIgnoreCase(str)) {
// return FASTA;
// } else if (NEXUS.name().equalsIgnoreCase(str)) {
// return NEXUS;
// } else if (XML.name().equalsIgnoreCase(str)) {
// return XML;
// }
// return null;
// }// END: getValue
//
// public static Enum[] getValues() {
//
// Enum values[] = new Enum[values().length];
//
// int i = 0;
// for(Enum value : OutputType.values()) {
//
// values[i] = getValue(value.toString());// value;
// i++;
//
// }
//
// return values;
// }
private final String text;
private final String extension;
private OutputType(String text, String extension) {
this.text = text;
this.extension = extension;
}
public String getText() {
return text;
}
public String getExtension() {
return extension;
}
public abstract String makeOutputString(SimpleAlignment alignment);
public static OutputType parseFromString(String text) {
for (OutputType type : OutputType.values()) {
if (type.getText().compareToIgnoreCase(text) == 0) {
return type;
}
}
return null;
}
public static OutputType parseFromExtension(String extension) {
for (OutputType type : OutputType.values()) {
if (type.getExtension().compareToIgnoreCase(extension) == 0) {
return type;
}
}
return null;
}
}
}// END: class