/*
Copyright (C) 2009 Diego Darriba, Federico Abascal
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package es.uvigo.darwin.prottest.util;
import static es.uvigo.darwin.prottest.global.ApplicationGlobals.*;
import java.io.PrintWriter;
import pal.alignment.Alignment;
import es.uvigo.darwin.prottest.util.printer.ProtTestFormattedOutput;
/**
* The Class ProtTestAlignment provides some operations about Alignment, as
* could be the empirical frequencies or the sample size calculation.
*
* @author Federico Abascal
* @author Diego Darriba
* @since 3.0
*/
public abstract class ProtTestAlignment {
/**
* Calculate sample size.
*
* @param alignment
* the alignment to calculate sample size
* @return the sample size
*/
public static double calculateSampleSize(Alignment alignment) {
// For AICc and BIC frameworks
return alignment.getSiteCount();
}
/**
* Calculate invariable sites.
*
* @param alignment
* the alignment
* @param verbose
* the verbose
*
* @return the int
*/
public static int calculateInvariableSites(Alignment alignment,
boolean verbose) {
// use this function to estimate a good starting value for the
// InvariableSites distribution.
int numSites = alignment.getSiteCount();
int numSeqs = alignment.getSequenceCount();
int inv = 0;
int tmp;
boolean tmpInv = true;
for (int i = 0; i < numSites; i++) {
tmp = indexOfChar(alignment.getData(0, i));
tmpInv = true;
for (int j = 0; j < numSeqs; j++) {
if (indexOfChar(alignment.getData(j, i)) != tmp) { // if at
// least one
// difference
// in column
// i:
j = numSeqs; // we exit this for.
tmpInv = false; // i is not an invariable site.
}
}
if (tmpInv)
inv++;
}
if (verbose)
System.out.println("Observed number of invariant sites: " + inv);
return inv;
}
/**
* Gets the frequencies.
*
* @param alignment
* the alignment
*
* @return the frequencies
*/
public static double[] getFrequencies(Alignment alignment) {
int numSites = alignment.getSiteCount();
int numSeqs = alignment.getSequenceCount();
double freqs[] = new double[AMINOACID_NUM_STATES];
int aas[] = new int[AMINOACID_NUM_STATES];
int total_aas = 0;
for (int i = 0; i < AMINOACID_NUM_STATES; i++)
aas[i] = 0;
for (int i = 0; i < numSites; i++)
for (int j = 0; j < numSeqs; j++) {
int index = indexOfChar(alignment.getData(j, i));
if (index >= 0) {
aas[index]++;
total_aas++;
}
}
for (int i = 0; i < AMINOACID_NUM_STATES; i++)
freqs[i] = (double) aas[i] / (double) total_aas;
return freqs;
}
/**
* Prints the frequencies.
*
* @param freqs
* the freqs
* @param out
* the out
*/
public static void printFrequencies(double[] freqs, PrintWriter out) {
out.println("Observed aminoacid frequencies:");
for (int i = 0; i < AMINOACID_NUM_STATES; i++) {
char aa = charOfIndex(i);
out.print(" " + aa + ": "
+ ProtTestFormattedOutput.getDecimalString(freqs[i], 3)
+ " ");
if ((i + 1) % 5 == 0)
out.println("");
}
}
/**
* Char of index.
*
* @param c
* the c
*
* @return the char
*/
public static char charOfIndex(int c) {
char[] charSet = { 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y' };
if (c >= 0 && c <= charSet.length)
return charSet[c];
else {
return '?';
}
}
/**
* Index of char.
*
* @param c
* the c
*
* @return the int
*/
private static int indexOfChar(char c) {
char[] charSet = { 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y' };
for (int charIndex = 0; charIndex < charSet.length; charIndex++)
if (charSet[charIndex] == c)
return charIndex;
return -1;
}
}