/* * MakeAncestralSequenceAnnotatedTree.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.app.tools; import dr.app.beast.BeastVersion; import dr.app.util.Arguments; import dr.app.util.Utils; import dr.inference.trace.TraceException; import dr.util.Version; import java.io.*; import java.util.Hashtable; import dr.app.beast.BeastVersion; import dr.app.util.Arguments; import dr.app.util.Utils; import dr.inference.trace.TraceException; import dr.util.Version; import java.io.*; import java.util.*; /** * MakeAncestralSequenceAnnotatedTree - Converts BAli-Phy *.out, *.fastas, *.trees files into * NEXUS format readable by BEAST and AncestralSequenceAnnotator * Based on Marc A. Suchard's make-nexus-file script * Reimplemented by Wai Lok Sibon Li in BEAST for compatibility with BAli-Phy v2.1.0 and above * * * @author Wai Lok Sibon Li */ public class MakeAncestralSequenceAnnotatedTree { private final static Version version = new BeastVersion(); public MakeAncestralSequenceAnnotatedTree(int thin, int skip, String outInputFileName, String fastasInputFileName, String treesInputfileName, String outputFileName) throws IOException, TraceException { File outInputFile = new File(outInputFileName); if (outInputFile.isFile()) { System.out.println("Analysing *.out file: " + outInputFileName); } else { System.err.println(outInputFileName + " does not exist!"); System.exit(0); } File fastasInputFile = new File(fastasInputFileName); if (fastasInputFile.isFile()) { System.out.println("Analysing *.fastas file: " + fastasInputFileName); } else { System.err.println(fastasInputFileName + " does not exist!"); System.exit(0); } File treesInputfile = new File(treesInputfileName); if (treesInputfile.isFile()) { System.out.println("Analysing *.trees file: " + treesInputfileName); } else { System.err.println(treesInputfileName + " does not exist!"); System.exit(0); } if (outputFileName != null) { FileOutputStream outputStream = new FileOutputStream(outputFileName); System.setOut(new PrintStream(outputStream)); } analyze(outInputFile, fastasInputFile, treesInputfile, skip, thin); } /** * * Analyses the output files * */ private void analyze(File outInputFile, File fastasInputFile, File treesInputfileName, int skip, int thin) throws TraceException { try { BufferedReader outFileReader = new BufferedReader(new FileReader(outInputFile)); BufferedReader fastasFileReader = new BufferedReader(new FileReader(fastasInputFile)); BufferedReader treesFileReader = new BufferedReader(new FileReader(treesInputfileName)); //BufferedReader br = new BufferedReader(fileReader); String line; int count = 0; int printedCount = 0; String substModel = null; ArrayList<String> trees = new ArrayList<String>(); Hashtable<String, Boolean> translateHash = new Hashtable<String, Boolean>(); Hashtable<String, Boolean> taxaHash = new Hashtable<String, Boolean>(); //int discreteVariableIndex = -1; //Hashtable<String, Integer> hash = new Hashtable<String, Integer>(); line = outFileReader.readLine(); while(line != null) { if(line.matches("subst model\\d* = .*?")) { substModel = line.replaceFirst("subst model\\d* = ", "").replaceFirst("\\(.*\\/(.*)\\.dat\\)/\\(","").replaceFirst("\\.dat\\)", "").replaceAll(" \\+ ", "+"); line = outFileReader.readLine(); } else if(line.matches("iterations = \\d+")) { int iterations = Integer.parseInt(line.replaceFirst("iterations = ","")); if(iterations % 10 ==0) { //todo Alignment is only printed every 10 iterations. Figure out a proper way to manage this // parse the model here double likelihood = 0; String model = null; search: while((line=outFileReader.readLine()) != null) { if(line.matches("iterations = \\d+")) { break search; } else if (line.matches(".*?likelihood = -\\d+.*?")) { likelihood = Double.parseDouble(line.replaceFirst(".+likelihood = ", "") .replaceFirst("\\s.+", "")); } else if (line.matches("\\s+Heat:beta = .+")) { model = line.replaceAll(" = ", "=").replaceAll("::", "_").replaceAll("\\*", "") .replaceAll("^ ", "").replaceAll("\\s+", ","); } } String tree = treesFileReader.readLine(); // parse the sequence here String line2; int previousLineLength=1; search: while((line2=fastasFileReader.readLine()) != null) { if(line2.matches("iterations = \\d+")) { int fastaIterations = Integer.parseInt(line2.replaceFirst("iterations = ","")); if(fastaIterations!=iterations) { throw new RuntimeException("Iterations is not the same: " + fastaIterations + ", " + iterations); } } else if(line2.length()==0 && previousLineLength==0) { break search; } else if(line2.matches(">.+")) { String sequence = fastasFileReader.readLine(); String sequenceName = line2.replaceFirst(">","").replaceFirst("\\s+.+",""); if(tree.matches(".*\\(\\s*"+sequenceName+":.+")) { taxaHash.put(sequenceName, false); tree = tree.replaceFirst("\\(\\s*"+sequenceName+":", "("+sequenceName+"[&states="+sequence+"]:"); } else if(tree.matches(".*,\\s*"+sequenceName+":.+")) { taxaHash.put(sequenceName, false); tree = tree.replaceFirst(",\\s*"+sequenceName+":", ","+sequenceName+"[&states="+sequence+"]:"); } else if(tree.matches(".*\\)\\s*"+sequenceName+":.+")) { // This one is for internal nodes tree = tree.replaceFirst("\\)\\s*"+sequenceName+":", ")"+sequenceName+"[&states="+sequence+"]:"); } else if(tree.matches(".*\\)\\s*"+sequenceName+";.*")) { // This one is for internal nodes (root node) tree = tree.replaceFirst("\\)\\s*"+sequenceName+";", ")"+sequenceName+"[&states="+sequence+"];"); } else { throw new RuntimeException("Sequence name " + sequenceName + " does not exist in tree " + tree); } translateHash.put(sequenceName, false); } previousLineLength = line2.length(); } if((count % thin)==0 && count >= skip) { trees.add("tree STATE_"+iterations+" [&lnL="+likelihood+ ",subst="+substModel+"," + model +"] = [&U] "+ tree); printedCount++; } count++; } else { line = outFileReader.readLine(); } } else { line = outFileReader.readLine(); } } outFileReader.close(); report(trees, translateHash.keySet().toArray(new String[translateHash.size()]), taxaHash.keySet().toArray(new String[taxaHash.size()])); } catch (IOException e) { System.err.println("Error Parsing Input log: " + e.getMessage()); } } /* * Prints the output */ private void report(ArrayList<String> trees, String[] translateList, String[] taxaList) { Arrays.sort(taxaList); Arrays.sort(translateList); System.out.println("#NEXUS\n\nBegin taxa;"); System.out.println("\tDimensions ntaxa=" + taxaList.length + ";\n\tTaxlabels"); for(String taxaName : taxaList) { System.out.println("\t\t"+taxaName); } System.out.println("\t\t;\nEnd;\b\bBegin trees;\n\tTranslate"); for(int i=0; i<translateList.length; i++) { System.out.println("\t\t"+(i+1)+" "+translateList[i]); } for(String tree : trees) { for(int i=0; i<translateList.length; i++) { int index = -1; if((index = tree.indexOf("("+translateList[i]+"[")) > -1 ) { tree = tree.substring(0, index) + "(" + (i+1) + "[" + tree.substring(index+translateList[i].length()+2, tree.length()); } else if((index = tree.indexOf(","+translateList[i]+"[")) > -1 ) { tree = tree.substring(0, index) + "," + (i+1) + "[" + tree.substring(index+translateList[i].length()+2, tree.length()); } else if((index = tree.indexOf(")"+translateList[i]+"[")) > -1 ) { // This one is for internal nodes tree = tree.substring(0, index) + ")" + (i+1) + "[" + tree.substring(index+translateList[i].length()+2, tree.length()); } //tree.replaceFirst(translateList[i]+"\\[", String.valueOf(i)+"["); } System.out.println(tree); } System.out.println("End;"); } public static void printTitle() { System.out.println(); centreLine("MakeAncestralSequenceAnnotatedTree " + version.getVersionString() + ", " + version.getDateString(), 60); centreLine("BAli-Phy MCMC Output analysis", 60); centreLine("by", 60); centreLine("Wai Lok Sibon Li and Marc A. Suchard", 60); System.out.println(); centreLine("David Geffen School of Medicine", 60); centreLine("University of California, Los Angeles", 60); centreLine("sibonli@ucla.edu", 60); centreLine("and", 60); centreLine("msuchard@ucla.edu",60); System.out.println(); System.out.println(); } public static void centreLine(String line, int pageWidth) { int n = pageWidth - line.length(); int n1 = n / 2; for (int i = 0; i < n1; i++) { System.out.print(" "); } System.out.println(line); } public static void printUsage(Arguments arguments) { arguments.printUsage("makeancestralsequenceannotatedtree", "[<*.out file-name> <*.fastas file-name> <*.trees file-name> [<output-file-name>]]"); System.out.println(); System.out.println(" Example: ..."); System.out.println(" Example: ..."); System.out.println(); } /** * Main method */ public static void main(String[] args) throws IOException, TraceException { printTitle(); Arguments arguments = new Arguments( new Arguments.Option[]{ new Arguments.IntegerOption("thin", "thin"), new Arguments.IntegerOption("skip", "skip"), new Arguments.Option("help", "option to print this message") }); try { arguments.parseArguments(args); } catch (Arguments.ArgumentException ae) { System.out.println(ae); printUsage(arguments); System.exit(1); } if (arguments.hasOption("help")) { printUsage(arguments); System.exit(0); } int skip = -1; if (arguments.hasOption("skip")) { skip = arguments.getIntegerOption("skip"); } int thin = -1; if (arguments.hasOption("thin")) { thin = arguments.getIntegerOption("thin"); } // String discreteVariableName = null; // if (arguments.hasOption("discreteVariable")) { // discreteVariableName = arguments.getStringOption("discreteVariable"); // } String outInputFileName = null; String fastasInputFileName = null; String treesInputfileName = null; String outputFileName = null; String[] args2 = arguments.getLeftoverArguments(); if (args2.length > 4) { System.err.println("Unknown option: " + args2[2]); System.err.println(); printUsage(arguments); System.exit(1); } if (args2.length > 0) { outInputFileName = args2[0]; } if (args2.length > 0) { fastasInputFileName = args2[1]; } if (args2.length > 0) { treesInputfileName = args2[2]; } if (args2.length > 3) { outputFileName = args2[3]; } if (outInputFileName == null) { // No input file name was given so throw up a dialog box... outInputFileName = Utils.getLoadFileName("MakeAncestralSequenceAnnotatedTree " + version.getVersionString() + " - Select *.out file to analyse"); } if (fastasInputFileName == null) { // No input file name was given so throw up a dialog box... fastasInputFileName = Utils.getLoadFileName("MakeAncestralSequenceAnnotatedTree " + version.getVersionString() + " - Select *.fastas file to analyse"); } if (treesInputfileName == null) { // No input file name was given so throw up a dialog box... treesInputfileName = Utils.getLoadFileName("MakeAncestralSequenceAnnotatedTree " + version.getVersionString() + " - Select *.trees file to analyse"); } if(skip==-1) { System.out.println("Enter skip: "); BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); skip = Integer.parseInt(br.readLine()); } if(thin==-1) { System.out.println("Enter thin: "); BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); thin = Integer.parseInt(br.readLine()); } // if(discreteVariableName==null) { // System.out.println("Enter name of discrete variable: "); // BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); // discreteVariableName = br.readLine(); // } new MakeAncestralSequenceAnnotatedTree(thin, skip, outInputFileName, fastasInputFileName, treesInputfileName, outputFileName/*, discreteVariableName, !shortReport, hpds, ess, stdErr, marginalLikelihood*/); System.exit(0); } }