/*
* Defects.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.evolution.alignment;
import dr.evolution.datatype.Codons;
import dr.evolution.datatype.Nucleotides;
import dr.evolution.io.Importer;
import dr.evolution.io.NexusImporter;
import dr.evolution.sequence.Sequence;
import dr.evolution.util.TaxonList;
import java.io.EOFException;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
/**
* @author Alexei Drummond
*
* @version $Id: Defects.java,v 1.6 2005/05/23 10:44:07 alexei Exp $
*/
public class Defects {
private final ArrayList<Defect> defects = new ArrayList<Defect>();
private final Set<Integer> defectiveSequences = new HashSet<Integer>();
private final Set<Integer> defectiveSites = new HashSet<Integer>();
int sequenceCount = 0;
static final int STOP = -1;
static final int INDEL = -2;
int totalReads = 0;
public Defects(Alignment alignment) {
Codons codons = Codons.UNIVERSAL;
Nucleotides nucs = Nucleotides.INSTANCE;
Alignment codonAlignment = new ConvertAlignment(codons, alignment);
sequenceCount = codonAlignment.getSequenceCount();
for (int i = 0; i < codonAlignment.getSequenceCount(); i++) {
for (int j = 0; j < codonAlignment.getSiteCount(); j++) {
int state = codonAlignment.getState(i, j);
if (codons.isStopCodon(state)) {
//System.out.print("*");
// System.out.println("Found stop codon: " + triplet + " in sequence " + codonAlignment.getTaxonId(i) + " at site " + j);
addDefect(STOP, i, j);
totalReads += 1;
} else if (codons.isGapState(state)) {
//System.out.print("?");
// go back to original alignment and check
int start = j*3;
int state1 = alignment.getState(i,start);
int state2 = alignment.getState(i,start+1);
int state3 = alignment.getState(i,start+2);
if (nucs.isGapState(state1) && nucs.isGapState(state2) && nucs.isGapState(state3)) {
// real gap
} else {
// defect
// System.out.println("Found gap in sequence " + codonAlignment.getTaxonId(i) + " at site " + j);
addDefect(INDEL, i, j);
//totalReads += 1;
}
} else {
//System.out.print(" ");
totalReads += 1;
}
}
//System.out.println();
}
}
private void addDefect(int code, int sequence, int site) {
defects.add(new Defect(code, sequence, site));
defectiveSequences.add(sequence);
defectiveSites.add(site);
}
public int getDefectiveSequenceCount() {
return defectiveSequences.size();
}
public int getDefectiveSiteCount() {
return defectiveSites.size();
}
public int getDefectiveSites(int sequence) {
int count = 0;
for( Defect defect : defects ) {
if( defect.getSequence() == sequence ) {
count += 1;
}
}
return count;
}
public int getStopSites(int sequence) {
int count = 0;
for( Defect defect : defects ) {
if( defect.getSequence() == sequence && defect.isStop() ) {
count += 1;
}
}
return count;
}
public int getDefectiveSequences(int site) {
int count = 0;
for(Defect defect : defects) {
if( defect.getSequence() == site ) {
count += 1;
}
}
return count;
}
/**
* @return the total number of defects in the alignment
*/
public int getDefectCount() {
return defects.size();
}
public int getStopCodonCount() {
int count = 0;
for( Defect defect : defects ) {
if( defect.isStop() ) {
count += 1;
}
}
return count;
}
public int getSequenceCount(int defectCount) {
int count = 0;
for (int i = 0; i < sequenceCount; i++) {
if (getDefectiveSites(i) == defectCount) {
count += 1;
}
}
return count;
}
/**
* @return the maximum number of defects found in any single sequence.
*/
public int getMaxDefectiveSites() {
int maxDefects = 0;
for (int i = 0; i < sequenceCount; i++) {
int defects = getDefectiveSites(i);
if (defects > maxDefects) {
maxDefects = defects;
}
}
return maxDefects;
}
public int getMaxStopSites() {
int maxStops = 0;
for (int i = 0; i < sequenceCount; i++) {
int stops = getStopSites(i);
if (stops > maxStops) {
maxStops = stops;
}
}
return maxStops;
}
/**
* @param defectCount
* @return A set of integers representing the sequences with the given defect count.
*/
public Set<Integer> getSequences(int defectCount) {
TreeSet<Integer> set = new TreeSet<Integer>();
for (int i = 0; i < sequenceCount; i++) {
if (getDefectiveSites(i) == defectCount) {
set.add(i);
}
}
return set;
}
/**
* @param stopCount
* @return A set of integers representing the sequences with the given defect count.
*/
public Set<Integer> getSequencesByStopCount(int stopCount) {
TreeSet<Integer> set = new TreeSet<Integer>();
for (int i = 0; i < sequenceCount; i++) {
if (getStopSites(i) == stopCount) {
set.add(i);
}
}
return set;
}
public int getStopSequenceCount(int stopCount) {
int count = 0;
for (int i = 0; i < sequenceCount; i++) {
if (getStopSites(i) == stopCount) {
count += 1;
}
}
return count;
}
public int getTotalCodonsMinusGaps() { return totalReads; }
class Defect {
private int type = 0;
private int sequence = 0;
private int site = 0;
public Defect(int type, int sequence, int site) {
this.type = type;
this.sequence = sequence;
this.site = site;
}
public boolean isStop() { return type == STOP; }
public boolean isIndel() { return type == INDEL; }
public int getSequence() { return sequence; }
public int getSite() { return site; }
}
/**
* @param fileName
* @throws java.io.IOException
*/
private static Alignment readNexusFile(String fileName) throws java.io.IOException {
Alignment alignment = null;
TaxonList taxonList = null;
try {
FileReader reader = new FileReader(fileName);
NexusImporter importer = new NexusImporter(reader);
boolean done = false;
while (!done) {
try {
NexusImporter.NexusBlock block = importer.findNextBlock();
if (block == NexusImporter.TAXA_BLOCK) {
if (taxonList != null) {
throw new NexusImporter.MissingBlockException("TAXA block already defined");
}
taxonList = importer.parseTaxaBlock();
} else if (block == NexusImporter.DATA_BLOCK) {
// A data block doesn't need a taxon block before it
// but if one exists then it will use it.
alignment = importer.parseDataBlock(taxonList);
if (taxonList == null) {
taxonList = alignment;
}
} else if (block == NexusImporter.TREES_BLOCK) {
// ignore tree block
} else {
// Ignore the block..
}
} catch (EOFException ex) {
done = true;
}
}
} catch (Importer.ImportException ime) {
System.err.println("Error reading alignment: " + ime);
}
return alignment;
}
private static int[][] generateTransversionMutants(int[] triplet) {
int[][] tvmutants = new int[6][3];
for (int index = 0; index < 6; index += 1) {
for (int pos = 0; pos < 3; pos++) {
tvmutants[index][pos] = triplet[pos];
}
}
int index = 0;
for (int pos = 0; pos < 3; pos++) {
if (triplet[pos] % 2 == 0) {
tvmutants[index++][pos] = 1;
tvmutants[index++][pos] = 3;
} else {
tvmutants[index++][pos] = 0;
tvmutants[index++][pos] = 2;
}
}
return tvmutants;
}
private static int[][] generateTransitionMutants(int[] triplet) {
int[][] timutants = new int[3][3];
for (int index = 0; index < 3; index += 1) {
for (int pos = 0; pos < 3; pos++) {
timutants[index][pos] = triplet[pos];
}
}
for (int pos = 0; pos < 3; pos++) {
switch (triplet[pos]) {
case 0: timutants[pos][pos] = 2; break;
case 1: timutants[pos][pos] = 3; break;
case 2: timutants[pos][pos] = 0; break;
case 3: timutants[pos][pos] = 1; break;
default: throw new IllegalArgumentException();
}
}
return timutants;
}
public static void main(String[] args) throws java.io.IOException {
String alignmentFileName = null;
if (args != null && args.length > 0) {
alignmentFileName = args[0];
}
Alignment alignment = readNexusFile(alignmentFileName);
Defects defects = new Defects(alignment);
int siteCount = alignment.getSiteCount() / 3;
int sequenceCount = alignment.getSequenceCount();
int totalSites = siteCount * sequenceCount;
int totalReads = defects.getTotalCodonsMinusGaps();
int defectiveSites = defects.getDefectiveSiteCount();
int defectiveSequences = defects.getDefectiveSequenceCount();
int totalDefects = defects.getDefectCount();
int totalStops = defects.getStopCodonCount();
double siteP = (double)defectiveSites/(double)siteCount;
double sequenceP = (double)defectiveSequences/(double)sequenceCount;
double totalP = (double)totalDefects/(double)totalReads;
double totalSP = (double)totalStops/(double)totalReads;
System.out.println("Matrix size=" + totalSites);
System.out.println("Non-gap codons=" + totalReads);
System.out.println(defectiveSequences + "/" + sequenceCount + "(" + sequenceP + ") defective sequences.");
System.out.println(defectiveSites + "/" + siteCount + "(" + siteP + ") defective sites.");
System.out.println(totalDefects + "/" + totalReads + "(" + totalP + ") defects.");
System.out.println(totalStops + "/" + totalReads + "(" + totalSP + ") stop codons.");
double mean = (double)totalDefects/(double)sequenceCount;
System.out.println(mean + " defects per sequence");
double meanS = (double)totalStops/(double)sequenceCount;
System.out.println(meanS + " stops per sequence");
int maxDefects = defects.getMaxDefectiveSites();
int maxStops = defects.getMaxStopSites();
System.out.println("defective sequences:");
//Set seqs = defects.getSequences(maxDefects);
//for (Iterator i = seqs.iterator(); i.hasNext();) {
// Integer index = (Integer)i.next();
// String name = alignment.getTaxonId(index.intValue());
// System.out.println(" " + name);
//}
for (int d = 1; d <= maxDefects; d++) {
Set<Integer> seqs = defects.getSequences(d);
for(Integer seq : seqs) {
String name = alignment.getTaxonId(seq);
System.out.println(d + " " + name);
}
}
System.out.println("Defects\tSequences\texpected");
double probTerm;
probTerm = Math.exp(-mean); // probability of 0
for (int i=0; i<10; i++) {
System.out.println(i+"\t" + defects.getSequenceCount(i) + "\t" + probTerm*sequenceCount);
// compute probability of n from prob. of n-1
probTerm *= mean / (i+1);
}
System.out.println("Stops\tSequences\texpected");
probTerm = Math.exp(-meanS); // probability of 0
for (int i=0; i<10; i++) {
System.out.println(i+"\t" + defects.getStopSequenceCount(i) + "\t" + probTerm*sequenceCount);
// compute probability of n from prob. of n-1
probTerm *= meanS / (i+1);
}
System.out.println("stop-codon sequences:");
for (int d = 1; d <= maxStops; d++) {
Set<Integer> seqs = defects.getSequencesByStopCount(d);
for(Integer index : seqs) {
String name = alignment.getTaxonId(index);
System.out.println(d + " " + name);
}
}
Consensus con = new Consensus("mode", alignment, true);
Sequence consensus = con.getConsensusSequence();
SimpleAlignment a = new SimpleAlignment();
a.addSequence(new Sequence(consensus));
ConvertAlignment ca = new ConvertAlignment(Codons.UNIVERSAL, a);
double[] pstop = new double[ca.getSiteCount()];
int[] counts = new int[10];
for (double kappa = 2.0; kappa <= 2.0; kappa += 1.0) {
for (int i = 0; i < ca.getSiteCount(); i++) {
int state = ca.getState(0,i);
if (Codons.UNIVERSAL.isStopCodon(state)) {
throw new RuntimeException("Consensus has a stop codon in it at position " + i + "!");
}
int[] triplet = Codons.UNIVERSAL.getTripletStates(state);
int[][] tvmutants = generateTransversionMutants(triplet);
int[][] timutants = generateTransitionMutants(triplet);
int tvStops = 0;
for (int j = 0; j < 6; j++) {
if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(tvmutants[j][0],tvmutants[j][1],tvmutants[j][2]))) {
tvStops += 1;
}
}
int tiStops = 0;
for (int j = 0; j < 3; j++) {
if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(timutants[j][0],timutants[j][1],timutants[j][2]))) {
tiStops += 1;
}
}
pstop[i] = (tvStops + kappa*tiStops) / (6.0 + kappa*3.0);
counts[tvStops+tiStops] += 1;
}
System.out.println("kappa = " + kappa + " pstop=" + dr.stats.DiscreteStatistics.mean(pstop));
}
System.out.println("stop-mutations\tcodons");
for (int i = 0; i < 10; i++) {
System.out.println(i+"\t" + counts[i]);
}
/*int count = 0;
int reps = 100000;
for (int i = 0; i < reps; i++) {
SimpleAlignment simpleAlignment = new SimpleAlignment();
simpleAlignment.addSequence(new Sequence(consensus));
//pick a random position
int pos = random.nextInt(simpleAlignment.getSiteCount());
int state = simpleAlignment.getState(0,pos);
int newState = random.nextInt(4);
while (newState == state) {
newState = random.nextInt(4);
}
simpleAlignment.setState(0,pos,newState);
defects = new Defects(simpleAlignment);
count += defects.getDefectCount();
}
double p = (double)count/(double)reps;
*/
double p = dr.stats.DiscreteStatistics.mean(pstop);
double rate = totalSP * (1.0-p) / p;
System.out.println("Total inferred point-mutation rate = " + rate);
System.out.println("Total inferred point-mutations = " + (totalStops * (1-p) / p));
System.out.println("Proportion of point-mutations that produce premature stop codons = " + p);
}
}