package statalign.base; import java.util.Arrays; import statalign.postprocess.utils.NewickParser; /** * Objects of this class completely describe the state of the MCMC at a certain time. * The state space that the chain explores comprises: * <ul> * <li>a phylogenetic tree (topology + edge lengths) * <li>alignments between neighbouring nodes in the tree (implicitly lengths of * the ancestral sequences) * <li>parameters of the insertion-deletion model (TKF92) and the user-selected * substitution model * </ul> * * <p>Apart from the above listed components State objects also store the * Felsenstein likelihoods calculated for each character of each sequence and the * full likelihood of the state to facilitate postprocessing based on these values * (e.g. ancestral sequence estimation, loglikelihood trace etc.). * * <p>Tree node representation assumption: first {@link #nl} nodes are the leaves, * then come the ancestral nodes. * * @author novak */ public class State { /** Number of nodes */ public int nn; /** Number of leaves (input sequences) */ public int nl; /** Root node of the tree */ public int root; /** Tree representation: left descendant of each node (first nl are leaves), -1 for none */ public int[] left; /** Tree representation: right descendant of each node (first nl are leaves), -1 for none */ public int[] right; /** Tree representation: parent of each node (first nl are leaves), -1 for none */ public int[] parent; /** Edge length to parent, for each node (first nl are leaves) */ public double[] edgeLen; /** * Alignment: for each sequence and each character in the sequence the index of the * ancestral character that it is aligned to, or -i-1 if it is not homologous to * any character in the ancestor (gap) and the next character in the ancestor sequence * (to the right) has index i */ public int[][] align; /** Felsenstein likelihoods for each sequence and each character therein */ public double[][][] felsen; /** All sequences as strings, including most likely ancestral characters */ public String[] seq; /** Names of the leaf sequences */ public String[] name; /** Indel model parameters (in the order: R, lambda, mu) */ public double[] indelParams; /** Substitution model parameters, as given by the substitution plugin */ public double[] substParams; /** Log-likelihood of the state, excluding model extension factors */ public double logLike; /** Cache to store previously calculated leaf alignment by {@link #getLeafAlign()} */ private String[] leafAlign; /** Cache to store previously calculated full alignment by {@link #getFullAlign()} */ private String[] fullAlign; /** Cache to store previously calculated newick tree string by {@link #getNewickString()} */ private String newickString; /** <code>true</code> if this state comes from the burnin.*/ public boolean isBurnin; /** * Constructs a new {@link State} object, filling the given parameter and * pre-allocating arrays (first dimensions only). Does not create parameter arrays. * * @param nn number of nodes total (including leaves) */ public State(int nn) { this.nn = nn; nl = (nn+1)/2; left = new int[nn]; right = new int[nn]; parent = new int[nn]; edgeLen = new double[nn]; align = new int[nn][]; felsen = new double[nn][][]; seq = new String[nn]; name = new String[nl]; } /** * Returns string representation of alignment between <i>node</i> and its parent. Uses * characters in {@link #seq} to construct the strings, - is printed for gaps. * * Don't call for root, it will produce array bounds errors. * * @param node the node to get the alignment for * @return String array of two elements, parent comes first */ public String[] getPairwiseAlign(int node) { StringBuilder par = new StringBuilder(); // parent StringBuilder des = new StringBuilder(); // descendant String s = seq[node], ps = seq[parent[node]]; int[] al = align[node]; int len = al.length, plen = align[parent[node]].length, i = 0, pi = 0, ali; while(i < len || pi < plen) { if(i == len || pi < normPos(ali=al[i])) { // deletion par.append(ps.charAt(pi)); des.append('-'); pi++; } else if(ali < 0) { // insertion par.append('-'); des.append(s.charAt(i)); i++; } else { // match par.append(ps.charAt(pi)); des.append(s.charAt(i)); pi++; i++; } } return new String[] { par.toString(), des.toString() }; } private int normPos(int pos) { return pos < 0 ? -pos-1 : pos; } /** * Returns the multiple alignment of all leaf sequences. */ public String[] getLeafAlign() { if(leafAlign == null) { Aligner aligner = new Aligner(true); leafAlign = aligner.createAlign(); if(Utils.DEBUG) checkConsistency(); } return leafAlign; } /** * Returns the multiple alignment of all sequences, including ancestors. */ public String[] getFullAlign() { if(fullAlign == null) { Aligner aligner = new Aligner(false); fullAlign = aligner.createAlign(); if(Utils.DEBUG) checkConsistency(); } return fullAlign; } /** * Returns the Newick string representation of the tree */ public String getNewickString() { if(newickString == null) { StringBuilder sb = new StringBuilder(); newick(root, sb); newickString = sb.toString(); } return newickString; } public String getNewickStringWithLabels() { if(newickString == null) { StringBuilder sb = new StringBuilder(); newickWithNumbers(root,sb); newickString = sb.toString(); } return newickString; } public void updateNewickStringWithLabels() { StringBuilder sb = new StringBuilder(); newickWithNumbers(root,sb); newickString = sb.toString(); } /** Recursively prints newick representation of subtree into sb */ private void newick(int node, StringBuilder sb) { newick(node,sb,false); } private void newickWithNumbers(int node, StringBuilder sb) { newick(node,sb,true); } private void newick(int node, StringBuilder sb, boolean withNumbers) { if(node < nl) { // leaf String nameEncoded = NewickParser.getEncodedTaxaName(name[node]); sb.append(nameEncoded); } else { sb.append('('); newick(left[node],sb,withNumbers); sb.append(','); newick(right[node],sb,withNumbers); sb.append(')'); if (withNumbers) sb.append("["+node+"]"); //sb.append("["+node+"]"); } if(node == root) { sb.append(';'); } else { sb.append(':'); sb.append(edgeLen[node]); } } private class Aligner { boolean fullAlign; char[] column, allGap; /** current position in each alignment (indexed by node) */ int[] pos; StringBuilder[] rows; public Aligner(boolean leafOnly) { fullAlign = !leafOnly; int len = leafOnly ? nl : nn; allGap = new char[len]; Arrays.fill(allGap, '-'); rows = new StringBuilder[len]; for(int i = 0; i < len; i++) rows[i] = new StringBuilder(); pos = new int[nn]; } String[] createAlign() { int l = left[root], r = right[root], len = align[root].length, i; for(i = 0; i < len; i++) { before(l, i); before(r, i); if(fullAlign) { newCol(); column[root] = seq[root].charAt(i); } at(l, i); at(r, i); outCol(); } before(l, i); before(r, i); String[] out = new String[rows.length]; for(i = 0; i < out.length; i++) out[i] = rows[i].toString(); return out; } private void newCol() { column = Utils.copyOf(allGap); } private void outCol() { if(column == null) return; for(int i = 0; i < column.length; i++) { rows[i].append(column[i]); } column = null; } private void before(int node, int pi) { int al[] = align[node]; int i = pos[node], len = al.length, ali = 0, l = left[node], r = right[node]; boolean leaf = node < nl; while(i < len && (ali=al[i]) < 0 && -ali-1 == pi) { if(!leaf) { before(l, i); before(r, i); } if(leaf || fullAlign) { newCol(); column[node] = seq[node].charAt(i); } if(!leaf) { at(l, i); at(r, i); } outCol(); i++; } if(!leaf && ( (i < len && ali >= 0 && ali == pi) || (i == len && pi == align[parent[node]].length))) { before(l, i); // in descendants output insertions (only once!) before(r, i); // before next match and after last column } pos[node] = i; } private void at(int node, int pi) { int al[] = align[node]; int i = pos[node], len = al.length, ali = 0; boolean leaf = node < nl; if(i < len && (ali=al[i]) >= 0 && ali == pi) { if(leaf || fullAlign) { if(column == null) newCol(); column[node] = seq[node].charAt(i); } if(!leaf) { at(left[node], i); at(right[node], i); } i++; } pos[node] = i; } } public static void main(String[] args) { State s = new State(5); s.test(); } private void test() { Arrays.fill(parent, -1); Arrays.fill(left, -1); Arrays.fill(right, -1); parent[0] = 3; parent[1] = 3; parent[2] = 4; parent[3] = 4; left[3] = 0; right[3] = 1; left[4] = 3; right[4] = 2; align[0] = new int[] { -2, -2, 1 }; align[1] = new int[] { 0, 1 }; align[2] = new int[] { -2, 2 }; align[3] = new int[] { -1, 2 }; align[4] = new int[] { 0, 0, 0 }; root = 4; seq[0] = "***"; seq[1] = "**"; seq[2] = "**"; seq[3] = "**"; seq[4] = "***"; for(String x : getFullAlign()) System.out.println(x); for(int i = 0; i < 4; i++) { System.out.println("///////////"); for(String x : getPairwiseAlign(i)) System.out.println(x); } } private void checkConsistency() { if(leafAlign == null) { Aligner aligner = new Aligner(true); leafAlign = aligner.createAlign(); } if(fullAlign == null) { Aligner aligner = new Aligner(false); fullAlign = aligner.createAlign(); } checkSeqs(); checkAligns(); } private void checkSeqs() { // pairwise for(int i = 0; i < nn; i++) { if(i != root) { String[] al = getPairwiseAlign(i); checkSeq(al[0], parent[i], "PairP"+parent[i]); checkSeq(al[1], i, "PairA"+i); } } // leaf align for(int i = 0; i < leafAlign.length; i++) checkSeq(leafAlign[i], i, "Leaf"+i); // full align for(int i = 0; i < fullAlign.length; i++) checkSeq(fullAlign[i], i, "Full"+i); } private void checkSeq(String line, int node, String at) { String s = seq[node]; int p = 0; char ch; for(int j = 0; j < line.length(); j++) { ch = line.charAt(j); if(ch == '-') continue; if(p == s.length() || ch != s.charAt(p)) throw new Error("Alignment inconsistency id1 at: "+at); p++; } if(p != s.length()) throw new Error("Alignment inconsistency id2 at: "+at); } private void checkAligns() { // full to pairwise for(int i = 0; i < nn; i++) { if(i != root) { String[] pair = getPairwiseAlign(i); String[] full = new String[] { fullAlign[parent[i]], fullAlign[i] }; checkAlign(full, pair, "FullPair"+i); } } // full to leaf checkAlign(Arrays.copyOf(fullAlign, leafAlign.length), leafAlign, "FullLeaf"); } private void checkAlign(String[] longer, String[] shorter, String at) { // remove gaps StringBuilder[] sb = new StringBuilder[longer.length]; int i, j; for(i = 0; i < longer.length; i++) sb[i] = new StringBuilder(); for(i = 0; i < longer[0].length(); i++) { for(j = 0; j < longer.length; j++) if(longer[j].charAt(i) != '-') break; if(j < longer.length) { for(j = 0; j < longer.length; j++) sb[j].append(longer[j].charAt(i)); } } String[] longNoGaps = new String[longer.length]; for(i = 0; i < longer.length; i++) longNoGaps[i] = sb[i].toString(); // check equality for(i = 0; i < longNoGaps.length; i++) if(!longNoGaps[i].equals(shorter[i])) throw new Error("Alignment inconsistency id3 at: "+at); } @SuppressWarnings("unused") private void print(String[] al) { for(String x : al) System.out.println(x); } }