package statalign.base;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import statalign.base.hmm.Hmm2;
import statalign.base.hmm.HmmNonParam;
import statalign.base.hmm.HmmSilent;
import statalign.base.hmm.HmmTkf92;
import statalign.base.thread.Stoppable;
import statalign.io.DataType;
import statalign.io.RawSequences;
import statalign.model.score.SubstitutionScore;
import statalign.model.subst.SubstitutionModel;
import statalign.model.subst.plugins.Dayhoff;
import statalign.postprocess.plugins.contree.CNetwork;
/**
* This is the current tree in the MCMC run.
* It extends Stoppable, so the MCMC run can be terminated in graphical interface mode
* while it calls a function in the Tree class.
* @author miklos, novak
*/
public class Tree extends Stoppable implements DataType {
/** Characters on this tree undergo substitutions according to this model. */
public SubstitutionModel substitutionModel;
/**
* When two fragments of two sequences on two neighbor vertices of the tree
* are realigned, the proposed new alignment is drawn from a forward-backward
* sampling of this HMM. It is a simplified version of the TKF92 model.
*/
public Hmm2 hmm2;
/**
* When two fragments of two sequences on two sibling vertices are realigned
* drawing a novel ancestral substring for their parent vertex, the proposed new
* alignment is drawn from a forward-backward sampling of this HMM. It is
* a pair-HMM having states emitting into the non-observable ancestral sequence.
*/
public HmmSilent hmm3;
/** The array of vertices of the tree. */
public Vertex vertex[];
/** The root of the tree */
public Vertex root;
/** The name of the tree */
public String title;
/** The name of the sequences */
public String[] names;
/** The heat parameter for this MCMC chain. */
public double heat = 1.0d;
public CNetwork network;
//boolean changingTree = false;
boolean changingTree = true;
public String[] previousFullAlign;
Mcmc owner;
public Tree() {
}
public RawSequences getSeqs() { return null; }
public void setSeqs(RawSequences rs) {}
/**
* Constructor to read a tree and an alignment from an output file.
* Not used in the current version. It aims at letting the possibility to
* get an input tree and an alignment as the starting state of the MCMC.
*/
// TODO: before using check if leaf vertices are created first!
Tree(File f, SubstitutionScore ss) {
try {
substitutionModel = new Dayhoff();
hmm2 = new HmmTkf92(null);
hmm3 = new HmmNonParam();
BufferedReader bf = new BufferedReader(new FileReader(f));
String s = bf.readLine().replaceFirst("\\w+\t", "");
String[] s1 = s.split("\t");
hmm2.params[0] = Double.parseDouble(s1[3]);
hmm2.params[1] = Double.parseDouble(s1[5]);
hmm2.params[2] = Double.parseDouble(s1[7]);
s = bf.readLine().replaceFirst("\\w+\t", "");
Tree t = new Tree(s);
bf.readLine(); // skip `Alignment' header
root = t.root;
vertex = t.vertex;
for (int i = 0; i < vertex.length; i++) {
vertex[i].owner = this;
}
for (int i = 0; i < vertex.length; i++) {
vertex[i].edgeChangeUpdate();
}
// reading the alignment
String[] alignment = new String[vertex.length];
for (int i = 0; i < alignment.length; i++) {
alignment[i] = bf.readLine().replaceFirst("\\w+\t", "");
}
Vertex actual = root;
actual.selected = false;
// System.out.println("alignment length: "+alignment.length+" Tree: "+printedTree());
for (int i = 0; i < alignment.length; i++) {
String temp = alignment[i].substring(31, alignment[i].length());
// System.out.println(i+" "+temp);
AlignColumn column = new AlignColumn(actual);
column.seq = new double[substitutionModel.e.length];
column.upp = new double[substitutionModel.e.length];
actual.first = column;
actual.length = 0;
for (int j = 0; j < temp.length(); j++) {
if (temp.charAt(j) != '-') {
// System.out.println("character in position: "+temp.charAt(j));
//column.seq[ss.which[temp.charAt(j)]] = 1.0;
for (int k = 0; k < column.seq.length; k++) {
column.seq[k] = ss.which[temp.charAt(j)][k];
}
AlignColumn next = new AlignColumn(actual);
next.seq = new double[substitutionModel.e.length];
column.next = next;
next.prev = column;
column = next;
actual.length++;
}
}
/*
AlignColumn next = new AlignColumn(actual);
next.seq = new double[substitutionModel.e.length];
column.next = next;
next.prev = column;
column = next;
actual.length++;
*/
actual.last = column;
// here we use this integer variable for remembering to the index in the alignment...
// pretty messy :)
actual.leafCount = i;
//alignment on the tree
if (actual.parent != null) {
AlignColumn parent = actual.parent.first;
column = actual.first;
String temp1 = alignment[actual.parent.leafCount].substring(31);
// System.out.println(temp1+"\n"+temp);
for (int j = 0; j < temp.length(); j++) {
if (temp.charAt(j) != '-' && temp1.charAt(j) != '-') {
column.parent = parent;
column.orphan = false;
if (actual.parent.left == actual) {
parent.left = column;
} else {
parent.right = column;
}
column = column.next;
parent = parent.next;
} else if (temp.charAt(j) == '-' && temp1.charAt(j) != '-') {
parent = parent.next;
} else if (temp.charAt(j) != '-' && temp1.charAt(j) == '-') {
column.parent = parent;
column.orphan = true;
column = column.next;
}
}
column.parent = parent;
column.orphan = false;
if (actual.parent.left == actual) {
parent.left = column;
} else {
parent.right = column;
}
} else {//we are at root, set all aligncolumns to orphan
column = actual.first;
while (column != null) {
column.orphan = true;
column = column.next;
}
}
actual.selected = false;
if (actual.left == null && actual.right == null) {
actual = actual.parent;
while (actual.selected && actual.parent != null) {
actual = actual.parent;
}
actual.selected = true;
actual = actual.right;
} else {
actual = actual.left;
}
}
//double check:
//String[] check = printedAlignment();
//for(int i = 0; i < check.length; i++){
// System.out.println(check[i]);
//}
root.calcFelsenRecursively();
root.calcIndelLogLikeRecursively();
//System.out.println("Log-likelihood: "+getLogLike());
names = new String[(vertex.length + 1) / 2];
int index = 0;
for (int i = 0; i < vertex.length; i++) {
if (vertex[i].left == null) {
names[index] = vertex[i].name;
index++;
}
}
} catch (IOException e) {
}
}
void markAllVerticesUnselected() {
for (Vertex v : vertex) {
v.selected = false;
}
}
void compareLike(Tree tree) {
System.out.println("this.logli=" + getLogLike());
System.out.println("tree.logli=" + tree.getLogLike());
System.out.println("this.indel=" + root.indelLogLike);
System.out.println("tree.indel=" + tree.root.indelLogLike);
System.out.println("this.orphan=" + root.orphanLogLike);
System.out.println("tree.orphan=" + tree.root.orphanLogLike);
root.compareLike(tree.root);
}
public void checkPointers() {
for (int i = 0; i < vertex.length; i++) {
if (vertex[i].left != null && vertex[i].right != null) {
//System.out.println("Checking vertex "+i);
vertex[i].checkPointers();
AlignColumn p;
// checking pointer integrity
for (AlignColumn c = vertex[i].left.first; c != null; c = c.next) {
p = vertex[i].first;
while (c.parent != p && p != null)
p = p.next;
if (p == null)
throw new Error(
"child does not have a parent!!!"
+ vertex[i] + " "
+ vertex[i].print(4));
}
for (AlignColumn c = vertex[i].right.first; c != null; c = c.next) {
p = vertex[i].first;
while (c.parent != p && p != null)
p = p.next;
if (p == null)
throw new Error(
"child does not have a parent!!!"
+ vertex[i] + " "
+ vertex[i].print(4));
}
}
}
}
/**
* This constructor constructs a tree using a newick representation of the tree. It <b>does not</b>
* construct its HMMs and substitution model!!!
*/
Tree(String descriptor) {
int numberofnodes = 1;
for (int i = 0; i < descriptor.length(); i++) {
if (descriptor.charAt(i) == ':') {
numberofnodes++;
}
}
vertex = new Vertex[numberofnodes];
root = new Vertex();
root.old = new Vertex();
String leftDescriptor = "";
int counter = 0;
int j;
for (j = 1; counter != 0 || descriptor.charAt(j) != ','; j++) {
leftDescriptor += descriptor.charAt(j);
if (descriptor.charAt(j) == '(') {
counter++;
}
if (descriptor.charAt(j) == ')') {
counter--;
}
}
//System.out.println(leftDescriptor);
Vertex leftChild = new Vertex(leftDescriptor, root);
root.left = leftChild;
String rightDescriptor = "";
for (j += 1; counter != -1; j++) {
rightDescriptor += descriptor.charAt(j);
if (descriptor.charAt(j) == '(') {
counter++;
}
if (descriptor.charAt(j) == ')') {
counter--;
}
}
rightDescriptor = rightDescriptor.substring(0, rightDescriptor.length() - 1);
//System.out.println(rightDescriptor);
Vertex rightChild = new Vertex(rightDescriptor, root);
root.right = rightChild;
//System.out.println(printedTree());
//Mapping vertices
int actualNumber = numberofnodes - 1;
Vertex actualVertex = root;
root.selected = false;
vertex[numberofnodes - 1] = root;
while (actualNumber != 0) {
// System.out.println(" we are in the root of: "+actualVertex.print()+" it is selected: "+actualVertex.selected+
// " actualNumber: "+actualNumber);
if (actualVertex.left == null && actualVertex.right == null) {
actualVertex.selected = true;
while (actualVertex.selected) {
actualVertex = actualVertex.parent;
}
actualVertex.selected = true;
} else {
actualVertex = (actualVertex.selected ? actualVertex.right : actualVertex.left);
actualNumber--;
vertex[actualNumber] = actualVertex;
actualVertex.selected = false;
}
}
names = new String[(vertex.length + 1) / 2];
int index = 0;
for (int i = 0; i < vertex.length; i++) {
if (vertex[i].left == null) {
names[index] = vertex[i].name;
index++;
}
}
}
public double getLogLike() {
if (Utils.DOWNWEIGHT_INDEL_LIKELIHOOD) {
return (1.0/(((vertex.length+1)/2)-3))*root.indelLogLike + root.orphanLogLike;
}
else return root.indelLogLike + root.orphanLogLike;
}
/**
* Recomputes the likelihood from scratch and checks whether everything was correct before.
* Throws error if it finds inconsistency.
*/
public void recomputeCheckLogLike() {
root.recomputeCheckLogLike();
}
public int countLeaves() {
return root.countLeaves();
}
public void countSilentIndels() {
for (Vertex v : vertex) v.countSilentIndels();
}
void checkUppFelsProducts() {
double[] result = new double[vertex.length];
System.out.println("upp ยท fels = ");
VERTEX: for (Vertex v : vertex) {
boolean isRoot = (v == root);
result[v.index] = 0.0;
AlignColumn p = null, pp = null;
if (!isRoot) p = v.parent.first;
AlignColumn c = v.first;
double tmp = 0;
COLUMN: while (c != v.last) {
tmp = Math.log(Utils.calcEmProb(c.seq,c.upp));
if (v.left!=null && v.right!=null) {
//System.out.println(c.seq[0]+" "+c.left.seq[0]+" "+v.left.charTransMatrix[0][0]);
boolean match = Utils.calcFelsenWithCheck(c.seq,
c.left!=null?c.left.seq:null,v.left.charTransMatrix,
c.right!=null?c.right.seq:null,v.right.charTransMatrix);
if (!match) {
throw new RuntimeException("Didn't match.");
}
}
result[v.index] += tmp;
char par = '-', car = ' ';
if (c != v.last) car = c.mostLikely();
if (p != null && p != v.parent.last) par = p.mostLikely(); else par = '-';
System.out.print("\t"+tmp+" ("+par+" -> "+car+") ");
for (int i=0; i<c.seq.length; i++) {
System.out.print(c.seq[i]+"/"+c.upp[i]+" ");
}
System.out.println();
if (isRoot) { c = c.next; continue COLUMN; }
p = c.parent;
if (p == null) throw new RuntimeException ("Start p is null.");
c = c.next;
while (c.orphan) {
if (c != v.last) car = c.mostLikely(); else car = ' ';
tmp = Math.log(Utils.calcEmProb(c.seq,c.upp));
result[v.index] += tmp;
System.out.print("\t"+tmp+" ("+"- -> "+car+") ");
for (int i=0; i<c.seq.length; i++) {
System.out.print(c.seq[i]+"/"+c.upp[i]+" ");
}
System.out.println();
c = c.next;
}
pp = c.parent;
p = p.next;
//if (p != null && p != v.parent.last) System.out.print(" ("+p.mostLikely()+","+pp.mostLikely()+") ");
// Include contributions from all parent columns that
// experience a deletion on this branch.
while (p != pp) {
if (p != null && p != v.parent.last) par = p.mostLikely();
tmp = Math.log(Utils.calcEmProb(p.seq, p.upp));
result[v.index] += tmp;
System.out.print("\t"+tmp+" ("+par+"-> -) ");
for (int i=0; i<p.seq.length; i++) {
System.out.print(p.seq[i]+"/"+p.upp[i]+" ");
}
System.out.println();
p = p.next;
if (p == v.parent.last) break COLUMN;
}
}
System.out.println("\t"+result[v.index]+" \n");
}
System.out.println();
}
public int getTopVertexId(int ind) {
if (root == vertex[ind]) {
return 0;
}
if (root.left == vertex[ind]) {
return 1;
}
if (root.right == vertex[ind]) {
return 2;
}
return -1;
}
/**
* Returns a {@link State} object representing the current state. Assumes that
* leaves come first in the {@link #vertex} array.
*/
//public State getState() {
// return getState(root);
//}
//public State getState(Vertex subtreeRoot) {
public State getState() {
int nn = vertex.length;
int nl = (nn+1)/2;
State state = new State(nn);
int i;
HashMap<Vertex, Integer> lookup = new HashMap<Vertex, Integer>();
for(i = 0; i < nn; i++)
lookup.put(vertex[i], i);
Vertex v;
state.root = lookup.get(root);
for(i = 0; i < nn; i++) {
v = vertex[i];
state.left[i] = v.left != null ? lookup.get(v.left) : -1;
state.right[i] = v.right != null ? lookup.get(v.right) : -1;
state.parent[i] = v.parent != null ? lookup.get(v.parent) : -1;
state.edgeLen[i] = v.edgeLength;
state.align[i] = v.getAlign();
state.felsen[i] = v.getFelsen();
state.seq[i] = v.sequence();
}
for(i = 0; i < nl; i++)
state.name[i] = vertex[i].name;
state.indelParams = hmm2.params.clone();
state.substParams = substitutionModel.params.clone();
state.logLike = getLogLike();
state.isBurnin = this.owner.burnin;
return state;
}
/**
* Generates a String contaning the description of the tree in Newick format.
* @return the string contaning the description of the tree in Newick format.
*/
public String printedTree() {
StringBuffer b = new StringBuffer();
root.print(b);
return b.toString();
}
public String printedTreeWithNumbers() {
StringBuffer b = new StringBuffer();
root.printWithNumbers(b);
return b.toString();
}
public double getLogPrior() {
return 0;
}
/*
* Returns with a String array containing the alignment of sequences on the tree.
* @param type This can be
* <ul>
* <li><tt>StatAlign</tt> Our own format. Contains the ancestral sequences, too
* <li><tt>Clustal</tt> Standard Clustal alignment format.
* <li><tt>Fasta</tt> Standard fasta format, alignments are broken into 60 character long lines.
* <li><tt>Phylip</tt> Standard Phylip format
* <li><tt>Nexus</tt> Nexus format. MrBayes can read it, but we did not test it for other programs.
* </ul>
* @return The string array containing the multiple alignment in the prescribed format.
*/
// public String[] printedAlignment(String type) {
// String[] s = printedAlignment();
// return Utils.alignmentTransformation(s, type, this);
//
// }
private String[] printedAlignment() {
String[] s = root.printedMultipleAlignment();
/*
int p = 29;
while(allSpace(s,p)){
for(int i = 0; i < s.length; i++){
String temp = s[i].substring(0, p) + s[i].substring(p+1,s[i].length());
s[i] = temp;
}
p--;
}
p++;
for(int i = 0; i < s.length; i++){
s[i] = s[i].substring(0,p)+" "+s[i].substring(p, s[i].length());
}
*/
return s;
}
/**
* It generates the array of row sequences.
* The format is sequence name TAB row sequence.
* @return The string array containing the sequence names and sequences.
*/
public String rowSequences() {
String s = "";
for (int i = 0; i < vertex.length; i++) {
if (vertex[i].left == null) {
s += vertex[i].name + "\t" + vertex[i].sequence() + "\n";
}
}
return s;
}
@Override
public boolean perSequenceData() {
return false;
}
@Override
public String getSummaryAssociatedWith(String sequenceName) {
return null;
}
@Override
public void removeDataAssociatedWith(String sequenceName) {
}
// /** This function is only for testing purposes. */
// public static void main(String[] args) {
// /*
// Tree tree = new Tree(new String[] {"kkkkkk", "kkkkkkkk", "kkkkkkkk", "kkkkkkkk"},
// new String[] {"A","B","C", "D"},
// new NonParametric(),
// new Blosum62());
// System.out.println(tree.printedTree());
// tree.root.calcFelsRecursively();
// System.out.println(tree.root.orphanLogLike);
// */
// try {
// Tree tree = new Tree(new File("newick"), new Blosum62());
// System.out.println(tree.printedTree());
// } catch (FileNotFoundException e) {
// } catch (IOException e) {
// }
// }
/**
* It roots the tree on a taxon
* @param taxonName Taxon on which to root tree
*/
public void makeRoot(String taxonName){
if(vertex.length>0){
Vertex taxon = vertex[0];
for(Vertex currentNode : vertex){
if(currentNode.name == taxonName){
taxon = currentNode;
}
}
ArrayList<Vertex> pathToRoot = new ArrayList<Vertex>();
//Store left or right value from above node...
ArrayList<Boolean> isLeft = new ArrayList<Boolean>();
Vertex nodeToAdd = taxon;
// create path from taxon to root - first in path is always taxon...last will always be root...
pathToRoot.add(taxon);
do{
isLeft.add((nodeToAdd.parent.left==nodeToAdd)?true:false);
nodeToAdd = nodeToAdd.parent;
pathToRoot.add(nodeToAdd);
}while(nodeToAdd != root);
// Now we have a list of vertices from the taxon to the root and an associate list of the side on which the taxon is of its parent....except for root of course...!
// make the non root side have parent of last node in list...
// set root as above second vertex
//Delete the root
}
}
}