package statalign.io.input.plugins;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.List;
import statalign.base.CircularArray;
import statalign.base.Tree;
import statalign.base.Vertex;
import statalign.io.input.DataReader;
/**
* Class to parse Newick format tree files or strings.
*
* The tree must either be an rooted or unrooted binary tree, the latter will be rooted after
* parsing at the middle of the third branch of the top node as the Tree class does not support
* unrooted trees.
*
* The vertex[] array of the tree are filled such that the leaf nodes come first in the order
* they are visited by a left-first preorder traversal then the internal nodes in the reversed
* order they are visited by the same traversal (root will be the last). The latter ones get
* labelled by their index in the array. Vertex.vertexNum fields are filled.
*
* You can specify a lower limit on edge lengths using the appropriate constructors. Default
* is a lower limit of 0.
*
* Any number of white space (including new lines) may be present.
*
* @author novak
*
*/
public class NewickReader extends DataReader {
// internal buffer holding the String representation of the complete tree
private String buf;
// current parsing position within buf.
private int pos;
CircularArray<Vertex> leaves;
CircularArray<Vertex> inNodes;
private double minEdgeLen = 0;
/**
* Constructs a NewickReader with the default minimum edge length of 0.
*/
public NewickReader() {
}
/**
* Constructs a NewickReader with a specified minimum edge length.
*/
public NewickReader(double minEdgeLen) {
this.minEdgeLen = minEdgeLen;
}
/**
* Parses tree.
*
* @return tree built
* @throws IOException only if parsing from file and I/O error occurs
* @throws FormatException
*
* @return tree built
* @throws FormatException if
* @throws IOException if I/O error occurs
*/
@Override
public Tree read(Reader reader) throws IOException {
readBuf(reader);
leaves = new CircularArray<Vertex>();
inNodes = new CircularArray<Vertex>();
Tree tree = new Tree();
Vertex root = new Vertex(tree);
inNodes.push(root);
expectNext("(");
parseSubtree(root, 0, true);
if(expectNext(",)") == 1) { // handle unrooted tree
parseSubtree(root, 1, false);
root.left.edgeLength = Math.max(minEdgeLen, root.left.edgeLength/2);
root.right.edgeLength = root.left.edgeLength;
} else {
parseSubtree(root, 1, true);
if(expectNext(",)") == 0) { // handle unrooted tree and transform to rooted one
root.parent = new Vertex(tree);
root.parent.left = root;
root = root.parent;
inNodes.unshift(root); // put new root to the beginning of inNodes
parseSubtree(root, 1, true);
if(root.right.edgeLength < 2*minEdgeLen) {
root.left.edgeLength = root.right.edgeLength = minEdgeLen;
} else {
root.left.edgeLength = (root.right.edgeLength /= 2);
}
expectNext(")");
}
}
expectNext(";");
// name internal nodes by numbers starting from the number of leaves
int id = leaves.length();
Vertex vertex;
while((vertex = inNodes.pop()) != null) {
if(vertex.left == null) // has been deleted
continue;
vertex.index = id;
// vertex.name = Integer.toString(id++);
leaves.push(vertex);
}
tree.vertex = leaves.toArray(new Vertex[leaves.length()]);
tree.root = root;
return tree;
}
/**
* Recursively parses a subtree with edge length to parent from buf starting at pos.
* Moves pos to first character after the subtree's representation.
* Subtree can also be a single named node. Root of the built subtree will be set
* as the child of parent.
* @param parent parent of the subtree
* @param child 0 if subtree should be left child 1 if right
* @throws FormatException
*/
protected void parseSubtree(Vertex parent, int child, boolean withLen) throws FormatException {
Vertex v = new Vertex(parent);
if(child == 0) {
parent.left = v;
} else {
parent.right = v;
}
char x = next();
if(x != '(') { // leaf node
v.index = leaves.length();
leaves.push(v);
// get name
int found = expectLater("():,;");
v.name = buf.substring(pos, found).trim();
pos = found; // skip name
} else { // internal node
pos++; // skip '('
inNodes.push(v);
parseSubtree(v, 0, withLen);
if(expectNext(",)") == 1) { // no right subtree: delete node
Vertex w = v.left;
w.parent = parent;
if(child == 0)
parent.left = w;
else
parent.right = w;
v.left = null; // delete from inNodes
v = w; // add edge length to existing node
} else {
parseSubtree(v, 1, withLen);
expectNext(")");
}
}
if(withLen) {
// get edge length
expectNext(":");
int found = expectLater("():,");
try {
v.edgeLength = Math.max(v.edgeLength+Double.parseDouble(buf.substring(pos, found)), minEdgeLen);
} catch (NumberFormatException e) {
throw new FormatException(FormatExceptType.EDGELEN_ERROR);
}
pos = found;
}
}
/**
* Reads tree file and stores it in buf.
* @throws IOException
*/
protected void readBuf(Reader reader) throws IOException {
BufferedReader br = new BufferedReader(reader);
StringBuilder build = new StringBuilder();
String str;
while((str=br.readLine()) != null) {
build.append(str);
}
br.close();
buf = build.toString();
}
/**
* Returns the index of the character in chars that is the next non-whitespace at pos
* (pos is then incremented)
* @throws FormatException if next non-whitespace at pos isn't one of chars
*/
protected int expectNext(String chars) throws FormatException {
char x = next();
int found = chars.indexOf(x);
if(found == -1) {
throw new FormatException(FormatExceptType.PARSE_ERROR, chars);
}
pos++;
return found;
}
/**
* Returns the first index (starting from pos) where any of the characters in chars is found,
* pos is not moved.
*/
protected int expectLater(String chars) throws FormatException {
for(int i = pos; i < buf.length(); i++) {
if(chars.indexOf(buf.charAt(i)) != -1) {
if(i == pos) {
throw new FormatException(FormatExceptType.EMPTY_FIELD);
}
return i;
}
}
throw new FormatException(FormatExceptType.PREMATURE_END, chars);
}
/**
* Returns next non-whitespace character and moves pos to its position.
*/
protected char next() throws FormatException {
for(;;) {
if(pos >= buf.length()) {
throw new FormatException(FormatExceptType.PREMATURE_END);
}
char x = buf.charAt(pos);
if(!Character.isWhitespace(x)) {
return x;
}
pos++;
}
}
@Override
public List<String> supportedExtensions() {
return Arrays.asList("tree", "ph", "newick");
}
public class FormatException extends IOException {
private static final long serialVersionUID = 1L;
FormatExceptType type;
String expected;
public FormatException(FormatExceptType type) {
this.type = type;
}
public FormatException(FormatExceptType type,
String expected) {
this.type = type;
this.expected = expected;
}
@Override
public String getMessage() {
return type.getMessage()+(expected != null ? " - expected "+
(expected.length()>1?" one of `":" `")+expected+"'":"")+
" at "+(pos+1)+" near `"+buf.substring(pos)+"'";
}
}
public static enum FormatExceptType {
PARSE_ERROR("NewickReader: Parse error"),
PREMATURE_END("NewickReader: Premature end of input"),
EDGELEN_ERROR("NewickReader: Ill-formatted edge length"),
EMPTY_FIELD("NewickReader: Edge length or node name field has zero length");
private String message;
private FormatExceptType(String message) {
this.message = message;
}
public String getMessage() {
return message;
}
}
public static void main(String[] args) throws IOException, FormatException {
Tree tree = new NewickReader().read(new StringReader(" (x:1e1,(t:3,u:3):3,(x:9,y:9):1);"));
System.out.println(tree.printedTree());
}
}