/*
* GenerateRelaxedClockXMLByData.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.app.tools;
import dr.app.beauti.util.XMLWriter;
import java.io.*;
/**
* Given data file format is
* 1st line : anything which is ignored
* 2nd line : one taxon name separator (e.g. space or tab) its sequence data
* 3rd line : ...
*
* @author Walter Xie
*/
public class GenerateRelaxedClockXMLByData {
static final String path = "/Users/local/EC/dxie004/Documents/BEAST1Release/RR_Datasets/";
static final String startingTree = "(((((Ssc:65,Bta:65):16,((Cfa:46,Fca:46):28,Eca:74):7):11," +
"(((Rno:20,Mmu:20):65,Ocu:85):5,(((Hsa:5,Ptr:5):5,Ppy:10):13,Mml:23):67):2):81,Tvu1:173):137,Gga:310);";
static final double rescaleHeight = 0.5;
static public void main(String[] args) {
int taxonNum = 14;
for (int c = 1; c <= 100; c++) {
String inputFileName = "geneRRNP" + c + ".phy";
String[][] data = new String[taxonNum][2];
try {
System.out.println("Input data from " + path + inputFileName + "\n\n");
FileReader fileReader = new FileReader(path + inputFileName);
LineNumberReader lineNumberReader = new LineNumberReader(fileReader);
String line = "";
int lineNum = 0;
while ((line = lineNumberReader.readLine()) != null) {
lineNum = lineNumberReader.getLineNumber();
System.out.println("Line: " + lineNum + ": " + line);
if (lineNumberReader.getLineNumber() > 1) { // not need 1st row
String[] d = line.split("\\s+");
if (d.length != 2) throw new IOException("d " + d.length);
data[lineNum - 2][0] = d[0]; // taxon name
data[lineNum - 2][1] = d[1]; // alignment
}
}
if (lineNum != 15) throw new IOException("not have 15 line " + lineNum);
fileReader.close();
String outputFileName = "geneRRNP" + c + ".xml";
System.out.println("\n\nCreating xml : " + path + outputFileName);
XMLWriter w = new XMLWriter(new BufferedWriter(new FileWriter(new File(path + outputFileName))));
writeData(w, data);
writeRestXML(w, outputFileName);
w.close();
System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++\n\n");
} catch (FileNotFoundException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
}
private static void writeRestXML(XMLWriter w, String outputFileName) throws IOException {
w.writeText("\n\t<!-- a Birth-Death speciation process (Gernhard 2008). -->\n" +
"\t<birthDeathModel id=\"birthDeath\" units=\"substitutions\">\n" +
"\t\t<birthMinusDeathRate>\n" +
"\t\t\t<parameter id=\"birthDiffRate\" value=\"1.0\" lower=\"0.0\" upper=\"1000000.0\"/>\n" +
"\t\t</birthMinusDeathRate>\n" +
"\t\t<relativeDeathRate>\n" +
"\t\t\t<parameter id=\"relativeDeathRate\" value=\"0.5\" lower=\"0.0\" upper=\"1.0\"/>\n" +
"\t\t</relativeDeathRate>\n" +
"\t</birthDeathModel>\n");
// w.writeText("\n\t<constantSize id=\"constant\" units=\"substitutions\">\n" +
// "\t\t<populationSize>\n" +
// "\t\t\t<parameter id=\"popSize\" value=\"0.077\" lower=\"0.0\" upper=\"Infinity\"/>\n" +
// "\t\t</populationSize>\n" +
// "\t</constantSize>\n");
w.flush();
w.writeText("\n" +
"\t<!-- Generate a random starting tree under the coalescent process -->\n" +
"\t<newick id=\"startingTree\" rescaleHeight=\"" + rescaleHeight + "\">\n");
w.writeText(startingTree);
w.writeText("\n" + "\t</newick>\n");
// w.writeText("\n<!-- Construct a rough-and-ready UPGMA tree as an starting tree -->\n" +
// "\t<upgmaTree id=\"startingTree\">\n" +
// "\t\t<distanceMatrix correction=\"JC\">\n" +
// "\t\t\t<patterns>\n"+
// "\t\t\t\t<alignment idref=\"alignment\"/>\n" +
// "\t\t\t</patterns>\n" +
// "\t\t</distanceMatrix>\n" +
// "\t</upgmaTree>\n");
w.flush();
w.writeText("\n" +
"\t<!-- Generate a tree model -->\n" +
"\t<treeModel id=\"treeModel\">\n" +
"\t\t<coalescentTree idref=\"startingTree\"/>\n" +
"\t\t<rootHeight>\n" +
"\t\t\t<parameter id=\"treeModel.rootHeight\"/>\n" +
"\t\t</rootHeight>\n" +
"\t\t<nodeHeights internalNodes=\"true\">\n" +
"\t\t\t<parameter id=\"treeModel.internalNodeHeights\"/>\n" +
"\t\t</nodeHeights>\n" +
"\t\t<nodeHeights internalNodes=\"true\" rootNode=\"true\">\n" +
"\t\t\t<parameter id=\"treeModel.allInternalNodeHeights\"/>\n" +
"\t\t</nodeHeights>\n" +
"\t</treeModel>\n");
w.flush();
// w.writeText("\n<!-- Generate a coalescent likelihood -->\n" +
// "\t<coalescentLikelihood id=\"coalescent\">\n" +
// "\t\t<model>\n" +
// "\t\t\t<constantSize idref=\"constant\"/>\n" +
// "\t\t</model>\n" +
// "\t\t<populationTree>\n" +
// "\t\t\t<treeModel idref=\"treeModel\"/>\n" +
// "\t\t</populationTree>\n" +
// "\t</coalescentLikelihood>\n");
w.writeText("\n" +
"\t<!-- Generate a speciation likelihood for Yule or Birth Death -->\n" +
"\t<speciationLikelihood id=\"speciation\">\n" +
"\t\t<model>\n" +
"\t\t\t<birthDeathModel idref=\"birthDeath\"/>\n" +
"\t\t</model>\n" +
"\t\t<speciesTree>\n" +
"\t\t\t<treeModel idref=\"treeModel\"/>\n" +
"\t\t</speciesTree>\n" +
"\t</speciationLikelihood>\n");
w.writeText("\n<!-- The uncorrelated relaxed clock (Drummond, Ho, Phillips & Rambaut (2006) PLoS Biology 4, e88 )-->\n" +
"\t<discretizedBranchRates id=\"branchRates\">\n" +
"\t\t<treeModel idref=\"treeModel\"/>\n" +
"\t\t<distribution>\n" +
"\t\t\t<logNormalDistributionModel meanInRealSpace=\"true\">\n" +
"\t\t\t\t<mean>\n" +
"\t\t\t\t\t<parameter id=\"ucld.mean\" value=\"1.0\" lower=\"0.0\" upper=\"Infinity\"/>\n" +
"\t\t\t\t</mean>\n" +
"\t\t\t\t<stdev>\n" +
"\t\t\t\t\t<parameter id=\"ucld.stdev\" value=\"0.3333333333333333\" lower=\"0.0\" upper=\"Infinity\"/>\n" +
"\t\t\t\t</stdev>\n" +
"\t\t\t</logNormalDistributionModel>\n" +
"\t\t</distribution>\n" +
"\t\t<rateCategories>\n" +
"\t\t\t<parameter id=\"branchRates.categories\" dimension=\"10\"/>\n" +
"\t\t</rateCategories>\n" +
"\t</discretizedBranchRates>\n");
// w.writeText("\n\t<rateStatistic id=\"meanRate\" name=\"meanRate\" mode=\"mean\" internal=\"true\" external=\"true\">\n" +
// "\t\t<treeModel idref=\"treeModel\"/>\n" +
// "\t\t<discretizedBranchRates idref=\"branchRates\"/>\n" +
// "\t</rateStatistic>\n" +
// "\t<rateStatistic id=\"coefficientOfVariation\" name=\"coefficientOfVariation\" mode=\"coefficientOfVariation\" internal=\"true\" external=\"true\">\n" +
// "\t\t<treeModel idref=\"treeModel\"/>\n" +
// "\t\t<discretizedBranchRates idref=\"branchRates\"/>\n" +
// "\t</rateStatistic>\n" +
// "\t<rateCovarianceStatistic id=\"covariance\" name=\"covariance\">\n" +
// "\t\t<treeModel idref=\"treeModel\"/>\n" +
// "\t\t<discretizedBranchRates idref=\"branchRates\"/>\n" +
// "\t</rateCovarianceStatistic>\n");
w.writeText("\t<!-- The HKY substitution model (Hasegawa, Kishino & Yano, 1985) -->\n" +
"\t<HKYModel id=\"hky\">\n" +
"\t\t<frequencies>\n" +
"\t\t\t<frequencyModel dataType=\"nucleotide\">\n" +
"\t\t\t\t<frequencies>\n" +
"\t\t\t\t\t<parameter id=\"hky.frequencies\" value=\"0.25 0.25 0.25 0.25\"/>\n" +
"\t\t\t\t</frequencies>\n" +
"\t\t\t</frequencyModel>\n" +
"\t\t</frequencies>\n" +
"\t\t<kappa>\n" +
"\t\t\t<parameter id=\"hky.kappa\" value=\"2.0\" lower=\"0.0\" upper=\"Infinity\"/>\n" +
"\t\t</kappa>\n" +
"\t</HKYModel>\n" +
"\n" +
"\t<!-- site model -->\n" +
"\t<siteModel id=\"siteModel\">\n" +
"\t\t<substitutionModel>\n" +
"\t\t\t<HKYModel idref=\"hky\"/>\n" +
"\t\t</substitutionModel>\n" +
"\t</siteModel>\n");
w.writeText("\t<!-- Likelihood for tree given sequence data -->\n" +
"\t<treeLikelihood id=\"treeLikelihood\" useAmbiguities=\"false\">\n" +
"\t\t<patterns idref=\"patterns\"/>\n" +
"\t\t<treeModel idref=\"treeModel\"/>\n" +
"\t\t<siteModel idref=\"siteModel\"/>\n" +
"\t\t<discretizedBranchRates idref=\"branchRates\"/> \n" +
"\t</treeLikelihood>\n");
w.flush();
w.writeText("\n" + "\t<!-- Define operators -->\n" +
"\t<operators id=\"operators\">\n");
w.writeText("\t\t<scaleOperator scaleFactor=\"0.75\" weight=\"0.1\">\n" +
"\t\t\t<parameter idref=\"hky.kappa\"/>\n" +
"\t\t</scaleOperator>\n" +
"\t\t<deltaExchange delta=\"0.01\" weight=\"0.4\">\n" +
"\t\t\t<parameter idref=\"hky.frequencies\"/>\n" +
"\t\t</deltaExchange>\n" +
// "\t\t<subtreeSlide size=\"0.0077\" gaussian=\"true\" weight=\"15\">\n" +
// "\t\t\t<treeModel idref=\"treeModel\"/>\n" +
// "\t\t</subtreeSlide>\n" +
// "\t\t<narrowExchange weight=\"15\">\n" +
// "\t\t\t<treeModel idref=\"treeModel\"/>\n" +
// "\t\t</narrowExchange>\n" +
// "\t\t<wideExchange weight=\"3\">\n" +
// "\t\t\t<treeModel idref=\"treeModel\"/>\n" +
// "\t\t</wideExchange>\n" +
// "\t\t<wilsonBalding weight=\"3\">\n" +
// "\t\t\t<treeModel idref=\"treeModel\"/>\n" +
// "\t\t</wilsonBalding>\n" +
"\t\t<scaleOperator scaleFactor=\"0.75\" weight=\"1\">\n" +
"\t\t\t<parameter idref=\"treeModel.rootHeight\"/>\n" +
"\t\t</scaleOperator>\n" +
"\t\t<uniformOperator weight=\"12\">\n" +
"\t\t\t<parameter idref=\"treeModel.internalNodeHeights\"/>\n" +
"\t\t</uniformOperator>\n" +
// "\t\t<scaleOperator scaleFactor=\"0.75\" weight=\"3\">\n" +
// "\t\t\t<parameter idref=\"popSize\"/>\n" +
// "\t\t</scaleOperator>\n" +
"\t\t<scaleOperator scaleFactor=\"0.75\" weight=\"0.1\">\n" +
"\t\t\t<parameter idref=\"birthDiffRate\"/>\n" +
"\t\t</scaleOperator>\n" +
"\t\t<scaleOperator scaleFactor=\"0.75\" weight=\"0.1\">\n" +
"\t\t\t<parameter idref=\"relativeDeathRate\"/>\n" +
"\t\t</scaleOperator>\n" +
"\t\t<upDownOperator scaleFactor=\"0.75\" weight=\"13\">\n" +
"\t\t\t<up>\n" +
"\t\t\t</up>\n" +
"\t\t\t<down>\n" +
"\t\t\t\t<parameter idref=\"treeModel.allInternalNodeHeights\"/>\n" +
"\t\t\t</down>\n" +
"\t\t</upDownOperator>\n" +
"\t\t<scaleOperator scaleFactor=\"0.75\" weight=\"3\">\n" +
"\t\t\t<parameter idref=\"ucld.stdev\"/>\n" +
"\t\t</scaleOperator> \n" +
"\t\t<uniformIntegerOperator weight=\"13\">\n" +
"\t\t\t<parameter idref=\"branchRates.categories\"/>\n" +
"\t\t</uniformIntegerOperator>\n" +
"\t\t<swapOperator size=\"1\" weight=\"13\" autoOptimize=\"false\">\n" +
"\t\t\t<parameter idref=\"branchRates.categories\"/>\n" +
"\t\t</swapOperator>\n");
w.writeText("\t</operators>");
w.flush();
w.writeText("\n" +
"\t<!-- Define MCMC -->\n" +
"\t<mcmc id=\"mcmc\" chainLength=\"1000000\" autoOptimize=\"true\" " +
"operatorAnalysis=\"" + outputFileName + ".ops\">\n" +
"\t\t<posterior id=\"posterior\">\n" +
"\t\t\t<prior id=\"prior\">\n" +
"\t\t\t\t<logNormalPrior mean=\"1.0\" stdev=\"1.25\" offset=\"0.0\" meanInRealSpace=\"false\">\n" +
"\t\t\t\t\t<parameter idref=\"hky.kappa\"/>\n" +
"\t\t\t\t</logNormalPrior>\n" +
"\t\t\t\t<uniformPrior lower=\"0.0\" upper=\"1.0\">\n" +
"\t\t\t\t\t<parameter idref=\"hky.frequencies\"/>\n" +
"\t\t\t\t</uniformPrior>\n" +
"\t\t\t\t<exponentialPrior mean=\"0.3333333333333333\" offset=\"0.0\">\n" +
"\t\t\t\t\t<parameter idref=\"ucld.stdev\"/>\n" +
"\t\t\t\t</exponentialPrior> \n" +
// "\t\t\t\t<oneOnXPrior>\n" +
// "\t\t\t\t\t<parameter idref=\"popSize\"/>\n" +
// "\t\t\t\t</oneOnXPrior> \n" +
// "\t\t\t\t<coalescentLikelihood idref=\"coalescent\"/>\n");
"\t\t\t<speciationLikelihood idref=\"speciation\"/>\n");
w.writeText("\n" +
"\t\t\t</prior>\n" +
"\t\t\t<likelihood id=\"likelihood\">\n" +
"\t\t\t\t<treeLikelihood idref=\"treeLikelihood\"/>\n" +
"\t\t\t</likelihood>\n" +
"\t\t</posterior>\n" +
"\t\t<operators idref=\"operators\"/>\n");
w.flush();
w.writeText("\n" +
"\t\t<!-- write log to screen -->\n" +
"\t\t<log id=\"screenLog\" logEvery=\"100000\">\n" +
"\t\t\t<column label=\"Posterior\" dp=\"4\" width=\"12\">\n" +
"\t\t\t\t<posterior idref=\"posterior\"/>\n" +
"\t\t\t</column>\n" +
// "\t\t\t<column label=\"Prior\" dp=\"4\" width=\"12\">\n" +
// "\t\t\t\t<prior idref=\"prior\"/>\n" +
// "\t\t\t</column>\n" +
"\t\t\t<column label=\"Likelihood\" dp=\"4\" width=\"12\">\n" +
"\t\t\t\t<likelihood idref=\"likelihood\"/>\n" +
"\t\t\t</column>\n" +
"\t\t\t<column label=\"rootHeight\" sf=\"6\" width=\"12\">\n" +
"\t\t\t\t<parameter idref=\"treeModel.rootHeight\"/>\n" +
"\t\t\t</column>\n" +
// "\t\t\t<parameter idref=\"ucld.mean\"/>\n" +
"\t\t</log>\n"
);
w.writeText("\t\t<!-- write log to file -->\n" +
"\t\t<log id=\"fileLog\" logEvery=\"100\" fileName=\"" + outputFileName + ".log\">\n" +
"\t\t\t<posterior idref=\"posterior\"/>\n" +
"\t\t\t<prior idref=\"prior\"/>\n" +
"\t\t\t<treeLikelihood idref=\"treeLikelihood\"/>\n" +
// "\t\t\t<coalescentLikelihood idref=\"coalescent\"/>\n" +
"\t\t\t<speciationLikelihood idref=\"speciation\"/>\n" +
// "\t\t\t<parameter idref=\"popSize\"/>\n" +
"\t\t\t<parameter idref=\"birthDiffRate\"/>\n" +
"\t\t\t<parameter idref=\"relativeDeathRate\"/>\n" +
"\t\t\t<parameter idref=\"treeModel.rootHeight\"/>\n" +
"\t\t\t<parameter idref=\"hky.kappa\"/>\n" +
"\t\t\t<parameter idref=\"hky.frequencies\"/>\n" +
"\t\t\t<parameter idref=\"ucld.mean\"/>\n" +
"\t\t\t<parameter idref=\"ucld.stdev\"/>\n" +
"\t\t\t<parameter idref=\"branchRates.categories\"/> \n");
w.writeText("\t\t</log>\n");
w.writeText("\t\t<logTree id=\"treeFileLog\" logEvery=\"10000\" nexusFormat=\"true\" " +
"fileName=\"" + outputFileName + ".trees\" sortTranslationTable=\"true\">\n" +
"\t\t\t<treeModel idref=\"treeModel\"/>\n" +
"\t\t\t<discretizedBranchRates idref=\"branchRates\"/> \n" +
"\t\t\t<posterior idref=\"posterior\"/>\n" +
"\t\t</logTree>\n");
w.writeText("\t</mcmc>\n" +
"\t<report>\n" +
"\t\t<property name=\"timer\">\n" +
"\t\t\t<mcmc idref=\"mcmc\"/>\n" +
"\t\t</property>\n" +
"\t</report>\n" +
"</beast>\n");
w.flush();
}
private static void writeData(XMLWriter w, String[][] tips) throws IOException {
w.writeText("<?xml version=\"1.0\" standalone=\"yes\"?>\n" + "\n" +
"<!-- Generated by BEAUTi v1.6.2 -->\n" +
"<!-- by Alexei J. Drummond and Andrew Rambaut -->\n" +
"<!-- Department of Computer Science, University of Auckland and -->\n" +
"<!-- Institute of Evolutionary Biology, University of Edinburgh -->\n" +
"<!-- http://beast.bio.ed.ac.uk/ -->\n" +
"<beast>\n" + "\n" +
"\t<!-- The list of taxa to be analysed (can also include dates/ages). -->\n" +
"\t<!-- ntax=" + tips.length + " -->\n");
w.writeText("\t<taxa id=\"taxa\">\n");
for (int n = 0; n < tips.length; n++) {
w.writeText("\t\t<taxon id=\"" + tips[n][0] + "\"/>\n");
}
w.writeText("\t</taxa>\n");
w.flush();
w.writeText("\t<alignment id=\"alignment\" dataType=\"nucleotide\">\n");
for (int n = 0; n < tips.length; n++) {
w.writeText("\t\t<sequence>\n");
w.writeText("\t\t<taxon idref=\"" + tips[n][0] + "\"/>\n");
w.writeText(tips[n][1]);
w.writeText("\t\t</sequence>\n");
w.flush();
}
w.writeText("\t</alignment>\n");
w.writeText("\n\t<patterns id=\"patterns\" from=\"1\">\n" +
"\t\t<alignment idref=\"alignment\"/>\n" +
"\t</patterns>");
}
}