package statalign.base;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937c;
import org.apache.commons.math3.util.FastMath;
import statalign.ui.ErrorMessage;
/**
*
* This class contains multi-purpose static functions.
*
* @author miklos, novak, herman
*
*/
public class Utils{
private Utils(){
//... so that no objects of this class can be created
}
/**
* Debugging mode (various consistency checks done if on)
*/
public static boolean DEBUG = false;
// TODO Change this to an integer, so we can have different levels of
// debug information printed.
/**
* If this is set to <code>true</code> then the alignment moves operate
* on the whole alignment rather than selecting subwindows. This
* is usually much slower.
*/
public static boolean USE_FULL_WINDOWS = false;
/**
* If true, then ModelExtensions are allowed to offer a contribution
* to the emission probability used to compute the dynamic programming
* matrices for alignment proposals.
* NB this will be switched on automatically when a suitable
* ModelExtension is activated. Setting to true here will render this
* variable constitutively active, which is unlikely to be useful.
*/
public static boolean USE_MODEXT_EM = false;
/**
* If true, then ModelExtensions are allowed to offer an upper contribution
* to the emission probability used to compute the dynamic programming
* matrices for alignment proposals. The <i>upper</i> contribution involves
* information about all vertices outside of the current subtree.
* NB this will be switched on automatically when when a suitable
* ModelExtension is activated. Setting to true here will render this
* variable constitutively active, which is unlikely to be useful.
*/
public static boolean USE_MODEXT_UPP = false;
/**
* Whether to use information from the upper parts of the
* tree in order to fill out the <code>hmm2</code> and <code>hmm3</code>
* matrices.
*/
public static boolean USE_UPPER = false;
/** Power determining how much we favour realigning
* the larger subtree first when doing a nearest-neighbour
* interchange move.
*/
public static double LEAF_COUNT_POW = 1.0;
/**
* The random number generator used throughout the program.
* A new generator is constructed at each MCMC run using the seed in the
* corresponding MCMCPars object.
*/
public static RandomGenerator generator = new Well19937c(1);
/**
* @param x
* @param shape
* @param rate
* @return The unnormalised log density of Gamma(x | shape, rate)
*/
public static double logGammaDensity(double x, double shape, double rate) {
if (x < 0) {
return Double.NEGATIVE_INFINITY;
}
return (shape-1) * FastMath.log(x) - rate * x;
}
/**
* @param x
* @param alpha
* @param beta
* @return The unnormalised log density of Beta(x | alpha, beta)
*/
public static double logBetaDensity(double x, double alpha, double beta) {
if (x < 0 || x > 1) {
return Double.NEGATIVE_INFINITY;
}
return (alpha-1) * FastMath.log(x) + (beta-1) * FastMath.log(1-x);
}
/**
* During the burnin, the proposalWidthControlVariable for all continuous parameters
* is adjusted in order to ensure that the average acceptance rate is between
* MIN_ACCEPTANCE and MAX_ACCEPTANCE where possible.
* This is done by repeatedly multiplying the proposalWidthControlVariable
* by SPAN_MULTIPLIER until the acceptance falls within the desired range.
*/
public static final double SPAN_MULTIPLIER = 0.7;
/**
* During the burnin, the proposalWidthControlVariable for all McmcMove objects
* is adjusted (if <code>McmcMove.autoTune=true</code>) in order to ensure that the average
* acceptance rate is between MIN_ACCEPTANCE and MAX_ACCEPTANCE where possible.
* This is done by repeatedly multiplying the proposalWidthControlVariable
* by SPAN_MULTIPLIER until the acceptance falls within the desired range.
*/
public static final double MIN_ACCEPTANCE = 0.2;
// Put the minimum a bit higher than we want it to be
// because as the parameters converge the acceptance rate
// typically goes down
/**
* During the burnin, the proposalWidthControlVariable for all continuous parameters
* is adjusted in order to ensure that the average acceptance rate is between
* MIN_ACCEPTANCE and MAX_ACCEPTANCE where possible.
* This is done by repeatedly multiplying the proposalWidthControlVariable
* by SPAN_MULTIPLIER until the acceptance falls within the desired range.
*/
public static final double MAX_ACCEPTANCE = 0.4;
/**
* Initial value for the alignment proposal window length multiplier.
*/
public static double WINDOW_MULTIPLIER = 1.0;
/**
* Number of samples during burnin used to get a rough estimate
* of the current acceptance rate, for the purposes of tuning the
* proposal variance control parameters.
*/
public static final double MIN_SAMPLES_FOR_ACC_ESTIMATE = 10;
/**
* log(0) is set to Double.NEGATIVE_INFINITY. This is used in logarithmic adding.
* The logarithm of an empty sum is set to this value.
*
*/
public static final double log0 = Double.NEGATIVE_INFINITY;
public static final double MIN_EDGE_LENGTH = 0.01;
/** Minimum length for internal node sequence. */
public static final int MIN_SEQ_LENGTH = 1;
/** If true then we downweight the indel contribution to the overall likelihood. */
public static final boolean DOWNWEIGHT_INDEL_LIKELIHOOD = false;
/**
* If true then we divide out the stationary probability of the
* internal nodes from the indel likelihood, as per Redelings
* and Suchard (2005), using the TKF92 stationary distribution
* defined in Thorne et al. (1992).
*/
public static final boolean USE_INDEL_CORRECTION_FACTOR = true;
public static final int LOW_COUNT_THRESHOLD = 10;
public static final double LOW_COUNT_MULTIPLIER = 0.999;
/**
* If <code>true</code> then during the first half of the burnin
* if a particular McmcMove has been below its minimum acceptance
* rate for at least (LOW_COUNT_THRESHOLD * MIN_SAMPLES_FOR_ACC_ESTIMATE)
* iterations, then for the purposes of computing the acceptance
* ratio, we multiply the new log likelihood by LOW_COUNT_MULTIPLIER
* raised to a power that increases with the number of iterations
* beyond the threshold. This gradually favours the state jumping,
* which may be useful to avoid getting stuck in local modes during the
* burnin. Ideally such a scheme should not be needed, however.
*/
public static final boolean SHAKE_IF_STUCK = true;
public static final int MAX_SILENT_LENGTH = 10;
//public static final double SILENT_INSERT_PROB = 0.05;
public static final double SILENT_INSERT_PROB = 0.5;
private static double[] tempDoubleArray;
public static boolean VERBOSE = false;
/**
* This function selects a random integer with expected value given by expectedLength.
* The probability of the selection of that particular index is returned in selectLike.
*
* (MuDouble is used to allow for another return value, in C++ a double pointer/reference
* could be used instead)
*
* @param length The length of the array we need.
* @param selectLike A mutable double object to return the selection probability
* @param expectedLength The expected window length.
* @return A random integer as described above
*/
public static int linearizerWeight(int length, MuDouble selectLike, double expectedLength){
if(tempDoubleArray == null || tempDoubleArray.length < length){
tempDoubleArray = new double[length];
}
double p = 1.0/(1+expectedLength);
double q = 1.0 - p;
tempDoubleArray[1] = p;
double sum = tempDoubleArray[1];
for(int i = 2; i < length; i++){
tempDoubleArray[i] = tempDoubleArray[i-1] * q;
sum += tempDoubleArray[i];
}
double w = generator.nextDouble() * sum;
int k = 1;
double x = tempDoubleArray[1];
while(x < w && k < length - 1){
k++;
x += tempDoubleArray[k];
}
// System.out.println((tempDoubleArray[k]/sum));
selectLike.value = (tempDoubleArray[k]/sum);
return k;
}
/**
*
* This function returns the probability of choosing a particular index with linearizerWeight.
* The value returned is equal to mu.value when linearizerWeight(length, mu) returns 'index'.
*
* @param length Distribution parameter as in linearizerWeight
* @param index Selected index
* @return Probability of the selection
*/
public static double linearizerWeightProb(int length, int index, double expectedLength){
if(tempDoubleArray == null || tempDoubleArray.length < length){
tempDoubleArray = new double[length];
}
double p = 1.0/(1+expectedLength);
double q = 1.0 - p;
tempDoubleArray[1] = p;
double sum = tempDoubleArray[1];
for(int i = 2; i < length; i++){
tempDoubleArray[i] = tempDoubleArray[i-1] * q;
sum += tempDoubleArray[i];
}
return tempDoubleArray[index]/sum;
}
/**
* This function returns a random index, weighted by the weights in the array `weights'
*/
public static int weightedChoose(int[] weights){
int sum = 0;
for(int i = 0; i < weights.length; i++){
sum += weights[i];
}
int w = generator.nextInt(sum);
int k = 0;
sum = 0;
while((sum += weights[k]) <= w){
k++;
}
return k;
}
/**
* This function returns a random index, weighted by the weights in the array `weights'
*/
public static int weightedChoose(List<Integer> weights){
int sum = 0;
for(int i = 0; i < weights.size(); i++){
sum += weights.get(i);
}
int w = generator.nextInt(sum);
int k = 0;
sum = 0;
while((sum += weights.get(k)) <= w){
k++;
}
return k;
}
/**
* Similar to weightedChoose(weights), but the log-probability of the selection
* will be subtracted from the mutable double object selectLogLike
* (reason: proposal is in the denominator of acceptance ratio)
*
* (MuDouble is used to allow for another return value, in C++ a double pointer/reference
* could be used instead)
*
*/
public static int weightedChoose(double[] weights, MuDouble selectLogLike){
double sum = 0.0;
for(int i = 0; i < weights.length; i++){
sum += weights[i];
}
if(selectLogLike != null)
selectLogLike.value += Math.log(sum);
double w = generator.nextDouble() * sum;
int k = 0;
sum = 0.0;
while(k < weights.length-1 && (sum += weights[k]) <= w){
k++;
}
if(selectLogLike != null)
selectLogLike.value -= Math.log(weights[k]);
assert (weights[k] > 1e-5) : "weightedChoose error";
return k;
}
public static int weightedChoose(double[] weights){
return weightedChoose(weights,null);
}
public static int weightedChoose(List<Double> weights, MuDouble selectLogLike){
double sum = 0.0;
for(int i = 0; i < weights.size(); i++){
sum += weights.get(i);
}
if(selectLogLike != null)
selectLogLike.value += Math.log(sum);
double w = generator.nextDouble() * sum;
int k = 0;
sum = 0.0;
while(k < weights.size()-1 && (sum += weights.get(k)) <= w){
k++;
}
if(selectLogLike != null)
selectLogLike.value -= Math.log(weights.get(k));
assert (weights.get(k) > 1e-5) : "weightedChoose error";
return k;
}
// public static int weightedChoose(List<Double> weights){
// return weightedChoose(weights,null);
// }
/**
* Behaves exactly like weightedChoose(new double[]{1-prob,prob}, selectLogLike), but faster
*/
public static int chooseOne(double prob, MuDouble selectLogLike) {
if(generator.nextDouble() < prob) {
selectLogLike.value -= Math.log(prob);
return 1;
}
selectLogLike.value -= Math.log(1.0-prob);
return 0;
}
/**
* Equivalent to weightedChoose(weights, selectLogLike) where
* logWeights[i] = Math.log(weights[i]), but avoids overflows that might result from
* exponentiation.
* (MuDouble is used to allow for another return value, in C++ a double pointer/reference
* could be used instead)
*/
public static int logWeightedChoose(double[] logWeights, MuDouble selectLogLike){
double logSum = log0;
for(int i = 0; i < logWeights.length; i++){
logSum = logAdd(logSum, logWeights[i]);
}
if(selectLogLike != null)
selectLogLike.value += logSum;
double w = Math.log(generator.nextDouble()) + logSum;
int k = 0;
logSum = log0;
while(k < logWeights.length-1 && (logSum = logAdd(logSum, logWeights[k])) <= w){
k++;
}
if(selectLogLike != null)
selectLogLike.value -= logWeights[k];
assert (logWeights[k] > log0) : "logWeightedChoose error";
return k;
}
public static int logWeightedChoose(double[] logWeights){
return logWeightedChoose(logWeights,null);
}
/**
* For a tree of the form:
* <pre>
* gg
* /
* g
* / \
* p u
* / \
* t b
* </pre>
*
* this function determines valid possible indel states for <code>p</code> and <code>g</code>
* given fixed states for the neighbouring nodes.
* @param p The presence/absence of node <code>p</code>.
* @param g The presence/absence of node <code>b</code>.
* @param neighb An array indicating the state of the neighbouring nodes, in the order
* <code>{t,b,u,gg}</code>.
*
* @return A boolean value indicating whether the specified values of <code>p</code>
* and <code>b</code> are compatible with the neighbouring states.
*/
public static boolean isValidHistory(boolean p, boolean g, boolean[] neighb) {
return isValidHistory(p,g,neighb,false);
}
/**
* For a tree of the form:
* <pre>
* gg
* /
* g
* / \
* p u
* / \
* t b
* </pre>
*
* or, if <code>gIsRoot = true</code>, then for a tree of the form
* <pre>
* g
* / \
* p u
* / \
* t b
* </pre>
*
* this function determines valid possible indel states for <code>p</code> and <code>g</code>
* given fixed states for the neighbouring nodes.
* @param gIsRoot This is <code>true</code> if <code>g</code> is the root of the tree.
* @param p The presence/absence of node <code>p</code>.
* @param g The presence/absence of node <code>b</code>.
* @param neighb An array indicating the state of the neighbouring nodes, in the order
* <code>{t,b,u,gg}</code> (if <code>gIsRoot=false</code>), or <code>{t,b,u}</code>
* (if <code>gIsRoot=true</code>).
*
* @return A boolean value indicating whether the specified values of <code>p</code>
* and <code>b</code> are compatible with the neighbouring states.
*/
public static boolean isValidHistory(boolean p, boolean g, boolean[] neighb, boolean gIsRoot) {
boolean t = neighb[0], b = neighb[1], u = neighb[2], gg = false;
if (!gIsRoot) gg = neighb[3];
boolean result = true;
if ((u|gg)&(t|b)) result &= (p&g);
if (result && u&gg) result &= g;
if (result && t&b) result &= p;
if (result && (p&!g)) result &= !(u|gg);
if (result && (g&!p)) result &= !(t|b);
return result;
}
static boolean isValidHistory(Vertex.Neighbours n) {
boolean result = true;
if ((n.ux|n.ggx)&(n.tx|n.bx)) result &= (n.px&n.gx);
if (result && n.ux&n.ggx) result &= n.gx;
if (result && n.tx&n.bx) result &= n.px;
if (result && (n.px&!n.gx)) result &= !(n.ux|n.ggx);
if (result && (n.gx&!n.px)) result &= !(n.tx|n.bx);
return result;
}
/**
* Determines whether a particular bit is set in the binary representation
* of an integer.
* @param x The integer to be queried.
* @param pos The bit to be queried.
* @return <code>true</code> if the bit as position <code>pos</code> is set.
*/
static boolean bitIsSet(int x, int pos) {
return (x & (1<<pos))!=0;
}
/**
* Determines whether a particular indel history is valid given the
* rule that a character cannot be inserted more than once in a particular
* column (which is a definition of homology).
* @param neighb An integer representation of the presence/absence of the
* six nodes involved in a nearest-neighbour interchange.
* @return <code>true</code> if the neighbourhood structure is valid
*/
static boolean isValidHistory(int neighb) {
boolean tx = bitIsSet(neighb,0);
boolean bx = bitIsSet(neighb,1);
boolean ux = bitIsSet(neighb,2);
boolean ggx = bitIsSet(neighb,3);
boolean px = bitIsSet(neighb,4);
boolean gx = bitIsSet(neighb,5);
boolean result = true;
if ((ux|ggx)&(tx|bx)) result &= (px&gx);
if (result && ux&ggx) result &= gx;
if (result && tx&bx) result &= px;
if (result && (px&!gx)) result &= !(ux|ggx);
if (result && (gx&!px)) result &= !(tx|bx);
return result;
}
/**
* Takes a time in milliseconds and converts to a string to be printed.
*
* @param x The time to be formatted, in milliseconds (as a long).
* @return A string to be printed.
*/
public static String convertTime(long x) {
x /= 1000;
return String.format("%dh%02dm%02ds", x / 3600,
(x / 60) % 60, x % 60);
}
/**
* Logarithmically add two numbers
* @param a log(x)
* @param b log(y)
* @return log(x+y)
*/
public static double logAdd(double a, double b) {
if(a == b)
return Math.log(2)+a;
if(a < b)
return b+Math.log(FastMath.exp(a-b)+1);
return a+Math.log(FastMath.exp(b-a)+1);
}
/**
* NB this function only overwrites <code>res</code> if fel1 != null, otherwise it multiplies
* the existing elements.
* @param res
* @param fel1
* @param prob1
* @param fel2
* @param prob2
*/
static void calcFelsen(double[] res, double[] fel1, double[][] prob1, double[] fel2, double[][] prob2) {
double s;
int i, j, len = res.length;
for(i = 0; i < len; i++) {
if(fel1 != null) {
s = 0.0;
for(j = 0; j < len; j++)
s += prob1[i][j]*fel1[j];
res[i] = s;
}
else{
res[i] = 1.0;
}
if(fel2 != null) {
s = 0.0;
for(j = 0; j < len; j++){
s += prob2[i][j]*fel2[j];
}
res[i] *= s;
}
}
}
static boolean calcFelsenWithCheck(double[] res, double[] fel1, double[][] prob1, double[] fel2, double[][] prob2) {
double s;
int i, j, len = res.length;
double[] oldres = new double[res.length];
for(i = 0; i < res.length; i++){
oldres[i] = res[i];
}
for(i = 0; i < len; i++) {
if(fel1 != null) {
s = 0.0;
for(j = 0; j < len; j++)
s += prob1[i][j]*fel1[j];
res[i] = s;
}
else{
res[i] = 1.0;
}
if(fel2 != null) {
s = 0.0;
for(j = 0; j < len; j++){
s += prob2[i][j]*fel2[j];
}
res[i] *= s;
}
}
boolean match = true;
for(i = 0; i < res.length && match; i++){
match = Math.abs(oldres[i]/res[i] - 1.0) < 0.0001;
}
return match;
}
/**
* Calculates emission probability from Felsenstein likelihoods
*/
public static double calcEmProb(double fel[], double aaEquDist[]) {
double p = 0;
for(int i = 0; i < fel.length; i++)
p += fel[i]*aaEquDist[i];
return p;
}
public static String repeatedString(String s, int n) {
return new String(new char[n]).replace("\0", s);
}
/**
* Makes Enumeration iterable.
*
* @param <T> Enumeration element type
* @param en the Enumeration
* @return an Iterable that can iterate through the elements of the Enumeration
*/
public static <T> Iterable<T> iterate(final Enumeration<T> en) {
final Iterator<T> iterator = new Iterator<T>() {
@Override
public boolean hasNext() {
return en.hasMoreElements();
}
@Override
public T next() {
return en.nextElement();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
return new Iterable<T>() {
@Override
public Iterator<T> iterator() {
return iterator;
}
};
}
/**
* Joins strings using a separator string. Accepts any <code>Object</code>s
* converting them to strings using their <code>toString</code> method.
*
* @param strs
* strings to join
* @param separator
* the separator string
* @return a string made up of the strings separated by the separator
*/
public static String joinStrings(Object[] strs, String separator) {
StringBuilder result = new StringBuilder();
int i, l = strs.length;
if (l > 0) {
result.append(strs[0]);
}
for (i = 1; i < l; i++) {
result.append(separator);
result.append(strs[i]);
}
return result.toString();
}
/**
* Joins strings using a prefix and a separator string. Accepts any
* <code>Object</code>s converting them to strings using their
* <code>toString</code> method.
*
* @param strs
* strings to join
* @param prefix
* prefix for each string
* @param separator
* the separator string
* @return a string made up of the strings with the given prefix and
* separated by the separator
*/
public static String joinStrings(Object[] strs, String prefix, String separator) {
StringBuilder result = new StringBuilder();
int i, l = strs.length;
if (l > 0) {
result.append(prefix);
result.append(strs[0]);
}
for (i = 1; i < l; i++) {
result.append(separator);
result.append(prefix);
result.append(strs[i]);
}
return result.toString();
}
/**
* Finds all classes in a given package and all of its subpackages by walking
* through class path. Handles both directories and jar files.
*
* @param packageName the package in which the classes are searched for
* @return array of found class names (with full package prefixes)
*/
public static List<String> classesInPackage(String packageName) {
ArrayList<String> classNames = new ArrayList<String>();
String packageDir = packageName.replace('.', '/');
CircularArray<File> dirs = new CircularArray<File>();
CircularArray<String> pkgPrefs = new CircularArray<String>();
ClassLoader.getSystemClassLoader();
// ErrorMessage.showPane(null, System.getProperty("java.class.path"), false);
// TODO: FIX the classpath stuff.
// String[] paths = new String[]{ System.getProperty("user.dir") + "/bin" };
String[] paths = System.getProperty("java.class.path").split(File.pathSeparator);
for(String path : paths) {
try {
File file = new File(path);
if(!file.isDirectory()) {
JarFile jarFile = new JarFile(file);
Enumeration<JarEntry> fileNames = jarFile.entries();
while(fileNames.hasMoreElements()) {
String entryName = fileNames.nextElement().getName();
if(entryName.startsWith(packageDir+"/") && entryName.endsWith(".class")) {
classNames.add(entryName.substring(0, entryName.length()-6).replace('/', '.'));
}
}
} else {
dirs.push(new File(path+"/"+packageDir));
pkgPrefs.push(packageName);
while((file = dirs.shift()) != null) {
File[] children = file.listFiles();
String pkgPrefix = pkgPrefs.shift()+".";
if(children != null) {
for(File child : children) {
String chName = child.getName();
if(child.isDirectory()) {
dirs.push(child);
pkgPrefs.push(pkgPrefix+chName);
} else if(chName.endsWith(".class")) {
classNames.add(pkgPrefix+chName.substring(0, chName.length()-6));
}
}
} // if children
} // for dirs
} // if !cpFile.isDir
} catch(IOException e) {
ErrorMessage.showPane(null, e, true);
}
} // for classpath
return classNames;
}
/**
* Locates all plugins that are descendants of the specified plugin superclass. The plugins are
* expected to be in the package <code><i>root</i>.plugins</code> where <code><i>root</i></code> refers
* to the package of the superclass.
* @param superClass the ancestral plugin class
* @return list of plugins found
*/
@SuppressWarnings("unchecked")
public static<T> List<T> findPlugins(Class<T> superClass) {
List<String> pluginNames = Utils.classesInPackage(superClass.getPackage().getName()+".plugins");
List<T> pluginList = new ArrayList<T>();
for(int i = 0; i < pluginNames.size(); i++) {
try {
Class<?> cl = Class.forName(pluginNames.get(i));
if(!superClass.isAssignableFrom(cl))
continue;
pluginList.add((T)cl.newInstance());
} catch (Exception e) {
}
}
return pluginList;
}
/**
* Transforms an alignment into the prescribed format
*
* @param s String array containing the alignment in StatAlign format
* @param type The name of the format, might be "StatAlign", "Clustal", "Fasta", "Phylip", "Nexus"
* @param input The input data. Needed for the Nexus format that needs
* a name of the alignment (set to input.title) the type of the alignment (either
* nucleotide or protein, read from input.model) and the list of characters
* in the substitution model (also read from input.model).
* @return String array containing the alignment in the prescribed format
*/
public static String[] alignmentTransformation(String[] s, String[] names, String type, InputData input){
if(type.equals("StatAlign")){
String[] returnString = new String[s.length];
for (int i=0; i<s.length; i++) {
returnString[i] = names[i]+"\t"+s[i];
}
return returnString;
}
String[] returnAlignment = null;
//here we need different alignment types
//String[] names = new String[s.length];
String[] alignment = new String[s.length];
int leavesNumber = 0;
for(int i = 0; i < names.length; i++){
//names[i] = s[i].substring(0,s[i].indexOf('\t'));
if(names[i].charAt(0) != ' '){
leavesNumber++;
}
//alignment[i] = s[i].substring(s[i].indexOf('\t')+1,s[i].length());
alignment[i] = s[i];
}
String[] onlyLeavesAlignment = new String[leavesNumber];
String[] newNames = new String[leavesNumber];
leavesNumber = 0;
for(int i = 0; i < names.length; i++){
if(names[i].charAt(0) != ' '){
onlyLeavesAlignment[leavesNumber] = s[i];//.substring(s[i].indexOf('\t')+1);
newNames[leavesNumber] = names[i];
newNames[leavesNumber] = newNames[leavesNumber].replaceAll(" ", "");
// newNames[leavesNumber] = newNames[leavesNumber].replaceAll("_", " ");
newNames[leavesNumber] = newNames[leavesNumber].replaceAll("\\{", "(");
newNames[leavesNumber] = newNames[leavesNumber].replaceAll("\\}", ")");
leavesNumber++;
}
}
onlyLeavesAlignment = deleteAllGaps(onlyLeavesAlignment);
if(type.equals("Clustal")){
for(int i = 0; i < newNames.length; i++){
newNames[i] = newNames[i].replaceAll("\\(", "_");
newNames[i] = newNames[i].replaceAll("\\)", "_");
if(newNames[i].indexOf(' ') == -1){
newNames[i] = newNames[i].substring(0,Math.min(newNames[i].length(),30));
while(newNames[i].length() < 30){
newNames[i] += " ";
}
}
else{
newNames[i] = newNames[i].substring(0,Math.min(30, newNames[i].indexOf(' ')));
while(newNames[i].length() < 30){
newNames[i] += " ";
}
}
}
// System.out.println("Making Fasta alignment");
returnAlignment = new String[(onlyLeavesAlignment.length + 3) *(onlyLeavesAlignment[0].length()/60 + (onlyLeavesAlignment[0].length() % 60 == 0 ? 0 : 1)) + 1];
// System.out.println("only leaves number: "+onlyLeavesAlignment.length+
// "alignment length: "+onlyLeavesAlignment[0].length()+
// "new array length: "+returnAlignment.length);
int j = 1;
returnAlignment[0] = "CLUSTAL X type multiple sequence alignment generated by StatAlign";
for(int i = 0; i < onlyLeavesAlignment[0].length()/60; i++){
returnAlignment[j] = "";
returnAlignment[j+1] = "";
j+=2;
//System.out.println(returnAlignment[j]);
for(int k = 0; k < newNames.length; k++){
returnAlignment[j] = newNames[k]+"\t"+onlyLeavesAlignment[k].substring(i*60, (i+1) * 60);
//System.out.println(returnAlignment[j]);
j++;
}
returnAlignment[j] = " \t";
for(int k = i*60; k < (i+1) * 60; k++){
returnAlignment[j] += clustalCharacter(onlyLeavesAlignment,k);
}
j++;
}
//last line, if needed
if(onlyLeavesAlignment[0].length() % 60 != 0){
returnAlignment[j] = "";
returnAlignment[j+1] = "";
j+=2;
//System.out.println(returnAlignment[j]);
for(int k = 0; k < newNames.length; k++){
returnAlignment[j] = newNames[k]+"\t"+onlyLeavesAlignment[k].substring((onlyLeavesAlignment[k].length()/60)*60);
//System.out.println(returnAlignment[j]);
j++;
}
returnAlignment[j] = " \t";
for(int k = (onlyLeavesAlignment[0].length()/60)*60; k < onlyLeavesAlignment[0].length(); k++){
returnAlignment[j] += clustalCharacter(onlyLeavesAlignment,k);
}
}
}
else if(type.equals("Fasta")){
// System.out.println("Making Fasta alignment");
returnAlignment = new String[onlyLeavesAlignment.length *(onlyLeavesAlignment[0].length()/60 + (onlyLeavesAlignment[0].length() % 60 == 0 ? 1 : 2))];
// System.out.println("only leaves number: "+onlyLeavesAlignment.length+
// "alignment length: "+onlyLeavesAlignment[0].length()+
// "new array length: "+returnAlignment.length);
int j = 0;
for(int i = 0; i < newNames.length; i++){
returnAlignment[j] = ">"+newNames[i];
//System.out.println(returnAlignment[j]);
j++;
for(int k = 0; k < onlyLeavesAlignment[i].length()/60; k++){
returnAlignment[j] = onlyLeavesAlignment[i].substring(k*60, (k+1) * 60);
//System.out.println(returnAlignment[j]);
j++;
}
if(onlyLeavesAlignment[0].length() % 60 != 0){
returnAlignment[j] = onlyLeavesAlignment[i].substring((onlyLeavesAlignment[i].length()/60)*60);
//System.out.println(returnAlignment[j]);
j++;
}
}
}
else if(type.equals("Phylip")){
returnAlignment = new String[(onlyLeavesAlignment.length + 1)*(onlyLeavesAlignment[0].length()/60 + (onlyLeavesAlignment[0].length() % 60 == 0 ? 0 : 1))];
returnAlignment[0] = " "+onlyLeavesAlignment.length+" "+onlyLeavesAlignment[0].length();
for(int i = 0; i < newNames.length; i++){
newNames[i] = newNames[i].replaceAll("\\(", "_");
newNames[i] = newNames[i].replaceAll("\\)", "_");
newNames[i] = newNames[i].substring(0,Math.min(newNames[i].length(),10));
while(newNames[i].length() < 13){
newNames[i] += " ";
}
}
int j = 1;
for(int k = 0; k < newNames.length; k++){
returnAlignment[j] = newNames[k];
for(int l = 0; l < 6; l++){
returnAlignment[j] += (onlyLeavesAlignment[k].length() >= (l+1)*10 ?
onlyLeavesAlignment[k].substring(l*10,(l+1)*10)+" " :
"");
}
j++;
}
for(int i = 1; i < onlyLeavesAlignment[0].length()/60; i++){
returnAlignment[j] = "";
j++;
for(int k = 0; k < newNames.length; k++){
returnAlignment[j] = " ";
for(int l = 0; l < 6; l++){
returnAlignment[j] += onlyLeavesAlignment[k].substring(i*60+l*10,i*60+(l+1)*10)+" ";
}
j++;
}
}
//last line, if needed
if(onlyLeavesAlignment[0].length() % 60 != 0 && onlyLeavesAlignment[0].length() > 60){
returnAlignment[j] = "";
j++;
for(int k = 0; k < newNames.length; k++){
returnAlignment[j] = " ";
int l;
for(l = 0; l < (onlyLeavesAlignment[0].length() - (onlyLeavesAlignment[0].length()/60)*60)/10; l++){
returnAlignment[j] += onlyLeavesAlignment[k].substring((onlyLeavesAlignment[0].length()/60)*60+l*10,
(onlyLeavesAlignment[0].length()/60)*60+(l+1)*10)+" ";
}
//and the last piece...
returnAlignment[j] += onlyLeavesAlignment[k].substring((onlyLeavesAlignment[0].length()/60)*60+l*10);
j++;
}
}
}
else if(type.equals("Nexus")){
returnAlignment = new String[(onlyLeavesAlignment.length + 1)*((onlyLeavesAlignment[0].length()/50) + (onlyLeavesAlignment[0].length() % 50 == 0 ? 0 : 1)) + 10];
returnAlignment[0] = "#NEXUS";
returnAlignment[1] = "[TITLE: "+input.title+"]";
returnAlignment[2] = "";
returnAlignment[3] = "begin data;";
returnAlignment[4] = "dimensions ntax="+onlyLeavesAlignment.length+" nchar="+onlyLeavesAlignment[0].length();
returnAlignment[5] = "format interleave datatype="+input.model.getType()+" gap=- symbols=\"";
for(int i = 0; i < input.model.alphabet.length; i++){
returnAlignment[5] += input.model.alphabet[i];
}
returnAlignment[5]+="\";";
returnAlignment[6] = "";
returnAlignment[7] = "matrix";
for(int i = 0; i < newNames.length; i++){
newNames[i] = newNames[i].replaceAll("\\(", "_");
newNames[i] = newNames[i].replaceAll("\\)", "_");
newNames[i] = newNames[i].substring(0,Math.min(newNames[i].length(),24));
while(newNames[i].length() < 25){
newNames[i] += " ";
}
}
int j = 8;
for(int i = 0; i < onlyLeavesAlignment[0].length()/50; i++){
//System.out.println(returnAlignment[j]);
for(int k = 0; k < newNames.length; k++){
returnAlignment[j] = newNames[k]+onlyLeavesAlignment[k].substring(i*50, (i+1) * 50);
//System.out.println(returnAlignment[j]);
j++;
}
returnAlignment[j] = "";
j++;
}
if(onlyLeavesAlignment[0].length() % 50 != 0){
for(int k = 0; k < newNames.length; k++){
returnAlignment[j] = newNames[k]+onlyLeavesAlignment[k].substring((onlyLeavesAlignment[k].length()/50)*50);
//System.out.println(returnAlignment[j]);
j++;
}
}
returnAlignment[j] = ";";
returnAlignment[j+1] = "end;";
}
return returnAlignment;
}
private static char clustalCharacter(String[] alignment, int pos){
char c = '*';
for(int i = 0; i < alignment.length - 1; i++){
if(alignment[i].charAt(pos) != alignment[i+1].charAt(pos)){
c = ' ';
break;
}
}
return c;
}
private static String[] deleteAllGaps(String[] alignment){
String[] gapFreeAlignment = new String[alignment.length];
for(int i = 0; i < gapFreeAlignment.length; i++){
gapFreeAlignment[i] = "";
}
for(int i = 0; i < alignment[0].length(); i++){
if(!allSpace(alignment,i)){
for(int j = 0; j < alignment.length; j++){
gapFreeAlignment[j] += alignment[j].charAt(i);
}
}
}
return gapFreeAlignment;
}
static boolean allSpace(String[] s, int p){
boolean b = true;
for(int i = 0; i < s.length && b; i++){
b = s[i].charAt(p) == ' ';
}
return b;
}
public static char[] copyOf(char[] array) {
int len = array.length;
char[] copy = new char[len];
System.arraycopy(array, 0, copy, 0, len);
return copy;
}
public static int[] copyOf(int[] array) {
int len = array.length;
int[] copy = new int[len];
System.arraycopy(array, 0, copy, 0, len);
return copy;
}
public static double[] copyOf(double[] array) {
int len = array.length;
double[] copy = new double[len];
System.arraycopy(array, 0, copy, 0, len);
return copy;
}
public static int minMax(int value, int min, int max) {
if(value < min)
value = min;
else if(value > max)
value = max;
return value;
}
/**
* Merely for testing purposes....
*
* @param args Arguments are not used.
*/
// public static void main(String[] args){
// for(String s : Utils.classesInPackage(Dayhoff.class.getPackage().getName()))
// System.out.println(s);
//// double[] w = new double[] {1,0,3,4,5};
//// for(int i = 0; i < 10000; i++){
//// MuDouble x = new MuDouble();
//// int index = linearizerWeight(1000,x);
//// System.out.println("I chose index "+index+" probability: "+x.value+" backprobability: "+
//// linearizerWeightProb(1000,index));
//// }
//// for(int i = 0; i < 1000; i++){
//// Double x = new Double(0.0);
////
//// }
// }
}
/**
* Mutable Double class.
*
* A MuDouble object can be used in place of a double pointer/reference that is missing
* from the Java language.
*/
class MuDouble {
public double value;
public MuDouble() {
}
public MuDouble(double value) {
this.value = value;
}
@Override
public String toString() {
return Double.toString(value);
}
}