package com.ppfold.main; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.Stack; import com.ppfold.algo.Node; import com.ppfold.algo.Tree; public class NewickReader{ private static int nodeCounter = 0; //this will be used to specify node ID, //which is an arbitrary number for each node mostly for debugging purposes. public static Tree readNewick(String fullFileName) throws Exception{ String treestring = ""; try { FileReader input = new FileReader(fullFileName); BufferedReader reader = new BufferedReader(input); String line; line = reader.readLine().trim(); //always trim leading and ending spaces off if(line!=null){ treestring = treestring.concat(line.trim()); //parse lines to one string line = reader.readLine(); if(line!=null){ treestring = treestring.concat(line.trim()); //parse lines to one string } } reader.close(); Tree tree = NewickReader.parse(treestring); return tree; } catch (IOException e){ // If another exception is generated, print a stack trace throw new Exception("Error while trying to read tree file! Check that the name is OK."); } catch (Exception e){ throw new Exception("Error while trying to parse the tree file!"); } } public static Tree parse(String inputString) throws Exception { try{ //remove ; from end if(inputString.charAt(inputString.length()-1) == ';'){ inputString = inputString.substring(0,inputString.length()-1); } //remove any enclosing () pairs if(inputString.charAt(0)=='(' && (inputString.charAt(inputString.length()-1) == ')')){ inputString = inputString.substring(1,inputString.lastIndexOf(')')); } //Create a root node Node root = new Node(0); root.setName("root"); root.setId(0); root.setDistanceFromParent(0); nodeCounter = 1; //Process full string that corresponds to node=root. int success = processString(inputString, root); if(success==0){ //identify if there is an extra root node if(root.getChildren().size()==1){ if(root.getChildren().get(0).getDistanceFromParent() == 0d){ root = root.getChildren().get(0); //drop own root and replace it with the one provided by user } } Tree tree = new Tree(root); //create tree using the root node return tree; } else{ return null; } } catch(Exception e){ throw new Exception("An error occured while trying to parse the tree. Check format."); } } private static int processString(String inputString, Node parent) throws Exception{ //There are two cases here. Either the inputString contains brackets (more nested nodes), //or it only contains comma-separated children at the last level. if(inputString.indexOf('(') == -1){ //there are no brackets at all. String[] childrenString = inputString.split(","); //this will contain all the children for(String childString:childrenString){ //for each child, extract name and distance, and add it to the parent. String[] childSplitString = childString.split(":",2); Node thischild = new Node(Double.parseDouble(childSplitString[1])); //this sets distance from parent thischild.setName(childSplitString[0]); thischild.setId(nodeCounter++); parent.addChild(thischild); } } else{ //there are brackets. //first identify a bracket pair, because it represents an internal node //("this node" in the followings). //then create the node for this pair (process it recursively until needed) //then deal with the rest of the string. int[] positionOfNodeSubstring; try{ positionOfNodeSubstring = findNodeSubstring(inputString); } catch(Exception e){ throw new Exception(e); } if(positionOfNodeSubstring == null){return -1;} //error; findNodeSubstring will report what went wrong. String nodeSubstring = inputString.substring(positionOfNodeSubstring[0]+1, positionOfNodeSubstring[1]); //the subtree coming from this node String leftParentString = inputString.substring(0, positionOfNodeSubstring[0]); //everything left of this node String rightParentStringFull = inputString.substring(positionOfNodeSubstring[1]+1); //this contains info for this node + other nodes in parent. String rightParentString = ""; //will contain everything in the parent right of this node String[] splitRightParentString = rightParentStringFull.split(",",2); //isolate info about this node from other nodes in parent if(splitRightParentString.length==2){ //there are extra nodes at the end rightParentString = splitRightParentString[1]; } String nameAndNumberString = splitRightParentString[0]; //just the info about this node, always expected to be there String remainderParentString = leftParentString.concat(rightParentString); //the rest of the parent //extract&set the info for this node String[] nameNumberSplitString = nameAndNumberString.split(":",2); Node newNode = new Node(); if(nameNumberSplitString.length==1){ //only number is given newNode.setDistanceFromParent(Double.parseDouble(nameNumberSplitString[0])); } else{ //both name and number are given newNode.setDistanceFromParent(Double.parseDouble(nameNumberSplitString[1])); newNode.setName(nameNumberSplitString[0]); } newNode.setId(nodeCounter++); parent.addChild(newNode); //deal with the subtree of this node as needed processString(nodeSubstring, newNode); //process the rest of the parent, unless we have come to the end. if(remainderParentString.length()!=0){ processString(remainderParentString, parent); } } return 0; //success } public static int[] findNodeSubstring(String inputString) throws Exception{ if(inputString.indexOf('(') == -1){return null;} int[] pair = new int[2]; // identify outermost/first pairing parantheses. Stack<Integer> s = new Stack<Integer>(); for (int i=0;i<inputString.length(); i++) { if (inputString.charAt(i) == '(' ) { s.push(i); } if (inputString.charAt(i) ==')' ) { if (s.isEmpty()) { throw new Exception("Unmatched right paranthesis at position "+ (i+1) +" in input tree!"); } int leftPos = s.pop(); pair[0] = leftPos; pair[1] = i; } } if (!s.isEmpty()) { throw new Exception("Unmatched left paranthesis at position "+ (s.pop()+1) +" in input tree!"); } return pair; } }