package org.seqcode.genome.sequence;
import java.io.IOException;
import java.text.ParseException;
import java.util.Random;
import org.seqcode.data.io.BackgroundModelIO;
import org.seqcode.data.motifdb.MarkovBackgroundModel;
import org.seqcode.genome.Genome;
import org.seqcode.genome.Species;
import org.seqcode.gseutils.ArgParser;
import org.seqcode.gseutils.Args;
import org.seqcode.gseutils.NotFoundException;
import org.seqcode.gseutils.Pair;
public class RandomSequenceGenerator {
private MarkovBackgroundModel markov;
private int modelLen;
private Random rand = new Random();
public static void main(String[] args) throws IOException, ParseException, NotFoundException {
ArgParser ap = new ArgParser(args);
if(!ap.hasKey("len") || !ap.hasKey("num")||!ap.hasKey("markov") || !ap.hasKey("species")) {
System.err.println("Usage:\n" +
"RandomSequenceGenerator " +
"--len <length of sequences> " +
"--num <number of sequences> " +
"--markov <markov model file> " +
"--species <organism;genome> ");
return;
}
int length = new Integer(ap.getKeyValue("len")).intValue();
int number = new Integer(ap.getKeyValue("num")).intValue();
String mFile = ap.getKeyValue("markov");
Pair<Species, Genome> pair = Args.parseGenome(args);
MarkovBackgroundModel model = BackgroundModelIO.parseMarkovBackgroundModel(mFile, pair.cdr());
RandomSequenceGenerator gen = new RandomSequenceGenerator(model);
for(int i=1; i<=number; i++){
String s = gen.execute(length);
System.out.println(">Sequence"+i);
System.out.println(s);
}
}
public RandomSequenceGenerator(MarkovBackgroundModel m){
markov=m;
modelLen = markov.getMaxKmerLen();
}
public String execute(int len){
String seq = new String();
//Preliminary bases
for(int i=1; i<modelLen && i<=len; i++){
double prob = rand.nextDouble();
double sum=0; int j=0;
while(sum<prob){
String test = seq.concat(int2base(j));
sum += markov.getMarkovProb(test);
if(sum>=prob){
seq = test;
break;
}
j++;
}
}
//Remaining bases
for(int i=modelLen; i<=len; i++){
String lmer = seq.substring(seq.length()-(modelLen-1));
double prob = rand.nextDouble();
double sum=0; int j=0;
while(sum<prob){
String test = lmer.concat(int2base(j));
sum += markov.getMarkovProb(test);
if(sum>=prob){
seq =seq.concat(int2base(j));
break;
}
j++;
}
}
return seq;
}
protected String int2base(int x){
String c;
switch(x){
case 0:
c="A"; break;
case 1:
c="C"; break;
case 2:
c="G"; break;
case 3:
c="T"; break;
default:
c="N";
}
return(c);
}
}