/* * VariableCoalescentSimulator.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.app.vcs; import dr.evolution.coalescent.CoalescentSimulator; import dr.evolution.coalescent.PiecewiseLinearPopulation; import dr.evolution.tree.Tree; import dr.evolution.tree.TreeUtils; import dr.evolution.util.Date; import dr.evolution.util.Taxa; import dr.evolution.util.Taxon; import dr.evolution.util.Units; import ml.options.Options; import java.io.*; import java.util.ArrayList; import java.util.List; /** * This program simulates a set of coalescent trees from an arbitrary variable population size history expressed as * a piecewise-linear function. * <p/> * Usage: * java -jar vcs.jar [options] <infile> <outfile> * <p/> * Options: * <ul> * <li> <b> -g <genTime> </b> sets the generation time to the given value. This is used to scale * times in the input file into units of generations. * The default value is 1.0 * <li> <b> -n <numSamples> </b> sets the number of samples to generate the trees from. * The default value is 50 * <li> <b> -p <popScale> </b> sets the population size scale factor to the given value. This scale * factor transforms the population sizes in the input file to * effective population sizes. This is useful if the population size * profiles are expressed, for example, as prevalences, so then scale * factor would represent the effective numbers of hosts in the * population of interest. * The default value is 1.0 * <li> <b> -se <sampleEnd> </b> the time at which last taxa is sampled, in the time scale provided by the input * file. If this differs from the time specified be -ss option then the * samples will be evenly spaced between the two times. * The default value is the last time if -f is set and the first time * otherwise. * <li> <b> -ss <sampleStart> </b> the time at which first taxa is sampled, in the time scale provided by the input * file. If this differs from the time specified be -se option then the * samples will be evenly spaced between the two times. * The default value is the last time if -f is set and the first time * otherwise. * <li> <b> -f </b> specifies that the population size history proceeds forward in time. * Otherwise the population size history is assumed to proceed into the past. <br> * <li> <b> -reps <reps> </b> the number of replicate simulations that will be performed. * Each replicate will be separated in the output file by a comment line that labels the replicate, e.g. #rep 0. * </ul> * <p/> * <infile> a whitespace-delimited plain text file. The first column contains the time * and should be ascending from zero. Subsequent columns contain the population size * histories, for which a tree will be simulated for each. * <p/> * <outfile> the file to which the trees will be written in newick format. * * @author Alexei Drummond */ public class VariableCoalescentSimulator { public static void main(String[] arg) throws IOException { long startTime = System.currentTimeMillis(); Options options = new Options(arg, 0, 7); options.getSet().addOption("g", Options.Separator.EQUALS, Options.Multiplicity.ZERO_OR_ONE); options.getSet().addOption("n", Options.Separator.EQUALS, Options.Multiplicity.ZERO_OR_ONE); options.getSet().addOption("p", Options.Separator.EQUALS, Options.Multiplicity.ZERO_OR_ONE); options.getSet().addOption("se", Options.Separator.EQUALS, Options.Multiplicity.ZERO_OR_ONE); options.getSet().addOption("ss", Options.Separator.EQUALS, Options.Multiplicity.ZERO_OR_ONE); options.getSet().addOption("reps", Options.Separator.EQUALS, Options.Multiplicity.ZERO_OR_ONE); options.getSet().addOption("f", Options.Multiplicity.ZERO_OR_ONE); if (!options.check()) { System.out.println(options.getCheckErrors()); System.out.println(); printUsage(); System.exit(1); } double generationTime = 1.0; double popSizeScale = 1.0; int n = 50; double ss = -1; double se = -1; int reps = 1; boolean timeForward = options.getSet().isSet("f"); if (options.getSet().isSet("g")) { String g = options.getSet().getOption("g").getResultValue(0); generationTime = Double.parseDouble(g); System.out.println("generation time = " + g); } if (options.getSet().isSet("n")) { String sampleSize = options.getSet().getOption("n").getResultValue(0); n = Integer.parseInt(sampleSize); System.out.println("sample size = " + n); } if (options.getSet().isSet("p")) { String p = options.getSet().getOption("p").getResultValue(0); popSizeScale = Double.parseDouble(p); System.out.println("population size scale factor = " + p); } if (options.getSet().isSet("ss")) { String sampleStart = options.getSet().getOption("ss").getResultValue(0); ss = Double.parseDouble(sampleStart); System.out.println("sample start time = " + ss); } if (options.getSet().isSet("se")) { String sampleEnd = options.getSet().getOption("se").getResultValue(0); se = Double.parseDouble(sampleEnd); System.out.println("sample end time = " + se); } if (options.getSet().isSet("reps")) { String replicates = options.getSet().getOption("reps").getResultValue(0); reps = Integer.parseInt(replicates); System.out.println("replicates = " + reps); } String filename = options.getSet().getData().get(0); String outfile = options.getSet().getData().get(1); // READ DEMOGRAPHIC FUNCTION BufferedReader reader = new BufferedReader(new FileReader(filename)); List<Double> times = new ArrayList<Double>(); String line = reader.readLine(); String[] tokens = line.trim().split("[\t ]+"); if (tokens.length < 2) throw new RuntimeException(); ArrayList<ArrayList> popSizes = new ArrayList<ArrayList>(); while (line != null) { double time = Double.parseDouble(tokens[0]) / generationTime; times.add(time); for (int i = 1; i < tokens.length; i++) { popSizes.add(new ArrayList<Double>()); popSizes.get(i - 1).add(Double.parseDouble(tokens[i])); } line = reader.readLine(); if (line != null) { tokens = line.trim().split("[\t ]+"); if (tokens.length != popSizes.size() + 1) throw new RuntimeException(); } } reader.close(); // GENERATE SAMPLE double lastTime = times.get(times.size() - 1); if (ss == -1) { ss = timeForward ? lastTime : times.get(0); } if (se == -1) { se = timeForward ? lastTime : times.get(0); } double dt = (se - ss) / ((double) n - 1.0); double time = ss; Taxa taxa = new Taxa(); for (int i = 0; i < n; i++) { double sampleTime; if (timeForward) { sampleTime = (lastTime - time) / generationTime; } else sampleTime = time / generationTime; Taxon taxon = new Taxon(i + ""); taxon.setAttribute(dr.evolution.util.Date.DATE, new Date(sampleTime, Units.Type.GENERATIONS, true)); taxa.addTaxon(taxon); time += dt; } double minTheta = Double.MAX_VALUE; double maxTheta = 0.0; PrintWriter out = new PrintWriter(new FileWriter(outfile)); int popHistory = 0; PiecewiseLinearPopulation[] demography = new PiecewiseLinearPopulation[popSizes.size()]; for (List<Double> popSize : popSizes) { double[] thetas = new double[popSize.size()]; double[] intervals = new double[times.size() - 1]; for (int i = intervals.length; i >= 0; i--) { int j = timeForward ? intervals.length - i : i - 1; int k = timeForward ? i : intervals.length - i + 1; if (i != 0) intervals[j] = times.get(k) - times.get(k - 1); double theta = popSize.get(k) * popSizeScale; thetas[j] = theta; if (theta < minTheta) { minTheta = theta; } if (theta > maxTheta) { maxTheta = theta; } //System.out.println(t + "\t" + theta); } System.out.println("N" + popHistory + "(t) range = [" + minTheta + ", " + maxTheta + "]"); demography[popHistory] = new PiecewiseLinearPopulation(intervals, thetas, Units.Type.GENERATIONS); popHistory += 1; } CoalescentSimulator simulator = new CoalescentSimulator(); for (int i = 0; i < reps; i++) { out.println("#rep " + i); for (int j = 0; j < demography.length; j++) { Tree tree = simulator.simulateTree(taxa, demography[j]); out.println(TreeUtils.newick(tree)); //System.err.println(Tree.Utils.newick(tree)); } } out.flush(); out.close(); long stopTime = System.currentTimeMillis(); System.out.println("Took " + (stopTime - startTime) / 1000.0 + " seconds"); } private static Taxa readSampleFile(String fileName, double generationTime) throws IOException { BufferedReader reader = new BufferedReader(new FileReader(fileName)); String line = reader.readLine(); Taxa taxa = new Taxa(); int id = 0; while (line != null) { if (!line.startsWith("#")) { String[] tokens = line.split("[\t ]+"); // sample times are in the same units as simulation double sampleTime = Double.parseDouble(tokens[0]) / generationTime; int count = Integer.parseInt(tokens[1]); for (int i = 0; i < count; i++) { Taxon taxon = new Taxon(id + ""); taxon.setAttribute(dr.evolution.util.Date.DATE, new Date(sampleTime, Units.Type.GENERATIONS, true)); taxa.addTaxon(taxon); id += 1; } } line = reader.readLine(); } return taxa; } private static void printUsage() { System.out.println( "Usage: \n" + " java -jar vcs.jar [options] <infile> <outfile>\n" + " \n" + "Options:\n" + " -g <value> sets the generation time to the given value. This is used to scale\n" + " times in the input file into units of generations. \n" + " The default value is 1.0\n" + " -n <int> sets the number of samples to generate the trees from. \n" + " The default value is 50\n" + " -p <value> sets the population size scale factor to the given value. This scale \n" + " factor transforms the population sizes in the input file to\n" + " effective population sizes. This is useful if the population size\n" + " profiles are expressed, for example, as prevalences, so then scale\n" + " factor would represent the effective numbers of hosts in the\n" + " population of interest. \n" + " The default value is 1.0\n" + " -ss <value> the time at which first taxa is sampled, in the time scale provided by the input\n" + " file. If this differs from the time specified be -ss option then the \n" + " samples will be evenly spaced between the two times.\n" + " The default value is the last time if -f is set and the first time\n" + " otherwise.\n" + " -se <value> the time at which last taxa is sampled, in the time scale provided by the input\n" + " file. If this differs from the time specified be -se option then the \n" + " samples will be evenly spaced between the two times.\n" + " The default value is the last time if -f is set and the first time\n" + " otherwise.\n" + " -reps <reps> the number of replicate simulations that will be performed. \n" + " Each replicate will be separated in the output file by a comment line that\n" + " labels the replicate, e.g. #rep 0.\n" + " -f specifies that the population size history proceeds forward in time.\n" + " Otherwise the population size history is assumed to proceed into the past.\n" + "\n" + "<infile> A whitespace-delimited plain text file. The first column contains the time\n" + " and should be ascending from zero. Subsequent columns contain the population size\n" + " histories, for which a tree will be simulated for each.\n" + "<outfile> The file to which the trees will be written in newick format."); } }