package org.seqcode.math.randomgenerators;
import java.util.Arrays;
import java.util.Random;
import org.seqcode.math.stats.StatUtil;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.File;
import java.io.IOException;
/**
* This class will implement samplers of various distributions
* @author geopapa
*
*/
public class Sampler {
private static Random r;
/***********************
** GAUSSIAN SAMPLES **
***********************/
public static double[][] gauss_rnd(int[] data_size, double[] mu) { return gauss_rnd(data_size, mu, new double[data_size.length]); }
public static double[][] gauss_rnd(int[] data_size, double mu) { return gauss_rnd(data_size, mu); }
public static double[][] gauss_rnd(int num_rows, int num_cols, double mu) { return gauss_rnd(num_rows, num_cols, mu, 1.0); }
public static double[] gauss_rnd(int num_cols, double mu) { return gauss_rnd(num_cols, mu, 1.0); }
/**
* It generates gaussian samples for <tt>data_size.length</tt> data sets. <br>
* Each data set has data size <tt>data_size[t]</tt> with mean <tt>mu[t]</tt> and standard deviation <tt>std[t]</tt>.
* (where t in {0, ..., data_size.length-1}
* @param data_size the size of each data set
* @param mu the mean of the Gaussian
* @param std the standard deviation of the Gaussian
* @return
*/
public static double[][] gauss_rnd(int[] data_size, double[] mu, double[] std) {
double[][] samples = new double[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = gauss_rnd(data_size[t], mu[t], std[t]);
return samples;
}//end of gauss_rnd method
/**
* It generates gaussian samples of mean <tt>mu</tt> and standard deviation <tt>std</tt>
* for <tt>data_size.length</tt> data sets, each having data_size[t] points.
* (where t in {0, ..., data_size.length-1}
* @param data_size the size of each data set
* @param mu the mean of the Gaussian
* @param std the standard deviation of the Gaussian
* @return
*/
public static double[][] gauss_rnd(int[] data_size, double mu, double std) {
double[][] samples = new double[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = gauss_rnd(data_size[t], mu, std);
return samples;
}//end of gauss_rnd method
public static double[][] gauss_rnd(int num_rows, int num_cols, double mu, double std) {
double[][] samples = new double[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = gauss_rnd(num_cols, mu, std);
return samples;
}//end of gauss_rnd method
public static double[] gauss_rnd(int num_cols, double mu, double std) {
double[] samples = new double[num_cols];
for(int n = 0; n < samples.length; n++)
samples[n] = gauss_rnd(mu, std);
return samples;
}//end of gauss_rnd method
public static double gauss_rnd(double mu) { return gauss_rnd(mu, 1.0); }
/**
* It generates a gaussian sample of mean <tt>mu</tt> and standard deviation <tt>std</tt>.
* @param mu the mean of the gaussian
* @param std the standard deviation of the gaussian
* @return
*/
public static double gauss_rnd(double mu, double std) {
if(std < 0) { throw new IllegalArgumentException("Standard deviation must be non-negative."); }
if (r == null) initRNG();
double sample = std*r.nextGaussian() + mu;
return sample;
}//end of gauss_rnd method
private static synchronized void initRNG() {
if (r == null)
r = new Random();
}//end of initRNG method
/**********************************
** CONTINUOUS UNIFORM SAMPLES **
**********************************/
public static double[][] unif_rnd(int[] data_size, double[] lower_bound, double[] upper_bound) {
double[][] samples = new double[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = unif_rnd(data_size[t], lower_bound[t], upper_bound[t]);
return samples;
}//end of unif_rnd method
/**
* Gives <tt>num_rows</tt> data sets of continuous uniform samples, each having
* lower bound <tt>lower_bound[t]</tt> (inclusive) and upper bound <tt>upper_bound</tt> (exclusive).
* @param num_rows
* @param num_cols
* @param lower_bound
* @param upper_bound
* @return
*/
public static double[][] unif_rnd(int num_rows, int num_cols, double[] lower_bound, double[] upper_bound) {
double[][] samples = new double[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = unif_rnd(num_cols, lower_bound[t], upper_bound[t]);
return samples;
}//end of unif_rnd method
public static double[][] unif_rnd(int[] data_size, double lower_bound, double upper_bound) {
double[][] samples = new double[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = unif_rnd(data_size[t], lower_bound, upper_bound);
return samples;
}//end of unif_rnd method
public static double[][] unif_rnd(int num_rows, int num_cols, double lower_bound, double upper_bound) {
double[][] samples = new double[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = unif_rnd(num_cols, lower_bound, upper_bound);
return samples;
}//end of unif_rnd method
public static double[][] unif_rnd(int[] data_size) {
double[][] samples = new double[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = unif_rnd(data_size[t]);
return samples;
}//end of unif_rnd method
public static double[] unif_rnd(int num_cols) {
return unif_rnd(num_cols, 0.0, 1.0);
}//end of unif_rnd method
public static double[] unif_rnd(int num_cols, double lower_bound, double upper_bound) {
double[] samples = new double[num_cols];
for(int n = 0; n < samples.length; n++)
samples[n] = unif_rnd(lower_bound, upper_bound);
return samples;
}//end of unif_rnd method
/**
* Gives a continuous uniform sample in the interval between <tt>lower_bound</tt> (inclusive)
* and <tt>upper_bound</tt> (exclusive).
* @param lower_bound
* @param upper_bound
* @return
*/
public static double unif_rnd(double lower_bound, double upper_bound) {
if(upper_bound < lower_bound)
throw new IllegalArgumentException("upper_bound has to be greater lower_bound.");
double sample = Math.random();
double scale = upper_bound - lower_bound;
sample = scale*sample + lower_bound;
return sample;
}//end of unif_rnd method
/*******************************
** DISCRETE UNIFORM SAMPLES **
*******************************/
public static int[][] unif_rnd(int[] data_size, int[] lower_bound, int[] upper_bound) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = unif_rnd(data_size[t], lower_bound[t], upper_bound[t]);
return samples;
}//end of unif_rnd method
/**
* Gives <tt>num_rows</tt> data sets of continuous uniform samples, each having
* lower bound <tt>lower_bound[t]</tt> and upper bound <tt>upper_bound</tt>.
* @param num_rows
* @param num_cols
* @param lower_bound
* @param upper_bound
* @return
*/
public static int[][] unif_rnd(int num_rows, int num_cols, int[] lower_bound, int[] upper_bound) {
int[][] samples = new int[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = unif_rnd(num_cols, lower_bound[t], upper_bound[t]);
return samples;
}//end of unif_rnd method
public static int[][] unif_rnd(int[] data_size, int lower_bound, int upper_bound) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = unif_rnd(data_size[t], lower_bound, upper_bound);
return samples;
}//end of unif_rnd method
public static int[][] unif_rnd(int num_rows, int num_cols, int lower_bound, int upper_bound) {
int[][] samples = new int[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = unif_rnd(num_cols, lower_bound, upper_bound);
return samples;
}//end of unif_rnd method
public static int[] unif_rnd(int num_cols, int lower_bound, int upper_bound) {
int[] samples = new int[num_cols];
for(int n = 0; n < samples.length; n++)
samples[n] = unif_rnd(lower_bound, upper_bound);
return samples;
}//end of unif_rnd method
/**
* Gives a discrete uniform sample in the interval between <tt>lower_bound</tt> (inclusive)
* and <tt>upper_bound</tt> (exclusive).
* @param lower_bound
* @param upper_bound
* @return
*/
public static int unif_rnd(int lower_bound, int upper_bound) {
if(upper_bound < lower_bound)
throw new IllegalArgumentException("upper_bound has to be greater lower_bound.");
if (r == null) initRNG();
int width = upper_bound - lower_bound;
int sample = r.nextInt(width) + lower_bound;
return sample;
}//end of unif_rnd method
/*******************************
** BINOMIAL SAMPLES **
*******************************/
public static int[][] binom_rnd(int[] data_size, int[] N, double[] p) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = binom_rnd(data_size[t], N[t], p[t]);
return samples;
}//end of binom_rnd method
public static int[][] binom_rnd(int num_rows, int num_cols, int[] N, double[] p) {
int[][] samples = new int[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = binom_rnd(num_cols, N[t], p[t]);
return samples;
}//end of binom_rnd method
public static int[][] binom_rnd(int[] data_size, int N, double p) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = binom_rnd(data_size[t], N, p);
return samples;
}//end of binom_rnd method
public static int[][] binom_rnd(int num_rows, int num_cols, int N, double p) {
int[][] samples = new int[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = binom_rnd(num_cols, N, p);
return samples;
}//end of binom_rnd method
public static int[] binom_rnd(int num_cols, int N, double p) {
int[] samples = new int[num_cols];
for(int n = 0; n < samples.length; n++)
samples[n] = binom_rnd(N, p);
return samples;
}//end of binom_rnd method
/**
* This method creates a sample from 0 to N-1 following a Binomial distribution, Bin(N, p), using the algorithm
* by Devroye taken from: <br>
* <a href="http://delivery.acm.org/10.1145/50000/42381/p216-kachitvichyanukul.pdf?key1=42381&key2=4588540521&coll=GUIDE&dl=GUIDE&CFID=48844090&CFTOKEN=30501264">
* Binomial Random Variate Generation</a>
* <br>
* Time Complexity: O(Np)
* @param N
* @param p
* @return
*/
public static int binom_rnd(int N, double p) {
if(N <= 0 || p < 0 || p > 1)
throw new IllegalArgumentException("n must be a non-negative integer, while p must be between 0.0 and 1.0");
int x, y;
double c;
x = 0; y = 0; c = Math.log(1-p);
if(c == 0 || N == 0) { return x; }
boolean first_pass = true;
while( y < N ) {
if(!first_pass) { x++; }
first_pass = false;
double unif_sample = Math.random();
y += (int) Math.floor(Math.log(unif_sample)/c) + 1;
}
return x;
}//end of binom_rnd method
/**************************
** MULTINOMIAL SAMPLES **
**************************/
/**
* Creates <tt>data_size.length</tt> data sets, each of size <tt>data_size[t]</tt>
* following a multinomial distribution <tt>prob[t]</tt>.
*/
public static int[][] multinomial_rnd(int[] data_size, double[][] prob) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = multinomial_rnd(data_size[t], prob[t]);
return samples;
}//end of multinomial_rnd method
public static int[][] multinomial_rnd(int[] data_size, double[] prob) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = multinomial_rnd(data_size[t], prob);
return samples;
}//end of multinomial_rnd method
public static int[][] multinomial_rnd(int num_rows, int num_cols, double[] prob) {
int[][] samples = new int[num_rows][num_cols];
for(int t = 0; t < num_rows; t++)
samples[t] = multinomial_rnd(num_cols, prob);
return samples;
}//end of multinomial_rnd method
public static int[] multinomial_rnd(int num_cols, double[] prob) {
int[] samples = new int[num_cols];
for(int n = 0; n < num_cols; n++)
samples[n] = multinomial_rnd(prob);
return samples;
}//end of multinomial_rnd method
/**
*
* @param prob multinomial probability. Has to sum up to 1.
* @return
*/
public static int multinomial_rnd(double[] prob) {
StatUtil.normalize(prob);
double[] cumsum = new double[prob.length-1];
double sum = 0.0;
for(int i = 0; i < prob.length-1; i++) {
sum += prob[i];
cumsum[i] = sum;
}
double unif_sample = Math.random();
int i;
for(i = 0; i < cumsum.length; i++)
if(unif_sample < cumsum[i])
break;
return i;
}//end of multinomial_rnd method
/***************************
** MARKOV CHAIN SAMPLES **
**************************/
/**
* Creates samples from a Markov Chain for <tt>data_size.length</tt> data sets, each having
* <tt>data_size[t]</tt> samples and having a <tt>init_prob[t]</tt> initial probability
* and a transition matrix <tt>trans_mat[t]</tt>.
*/
public static int[][] mc_rnd(int[] data_size, double[][] init_prob, double[][][] trans_mat) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = mc_rnd(data_size[t], init_prob[t], trans_mat[t]);
return samples;
}//end of mc_rnd method
public static int[][] mc_rnd(int[] data_size, double[][] init_prob, double[][][][] trans_mat) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = mc_rnd(data_size[t], init_prob[t], trans_mat[t]);
return samples;
}//end of mc_rnd method
/**
* For inhomogeneous MCs
* @param num_rows
* @param num_cols
* @param init_prob
* @param trans_mat
* @return
*/
public static int[][] mc_rnd(int num_rows, int num_cols, double[] init_prob, double[][][] trans_mat) {
int[][] samples = new int[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = mc_rnd(num_cols, init_prob, trans_mat);
return samples;
}//end of mc_rnd method
/**
* Creates samples from a Markov Chain for <tt>num_rows</tt> data sets, each having
* <tt>num_cols</tt> samples and having a <tt>init_prob</tt> initial probability
* and a transition matrix <tt>trans_mat</tt>.
* @param num_rows
* @param num_cols
* @param init_prob
* @param trans_mat
* @return
*/
public static int[][] mc_rnd(int num_rows, int num_cols, double[] init_prob, double[][] trans_mat) {
int[][] samples = new int[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = mc_rnd(num_cols, init_prob, trans_mat);
return samples;
}//end of mc_rnd method
public static int[][] mc_rnd(int[] data_size, double[] init_prob, double[][] trans_mat_homog_part, double[][] trans_mat_inhomog_part) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < samples.length; t++)
samples[t] = mc_rnd(data_size[t], init_prob, trans_mat_homog_part, trans_mat_inhomog_part);
return samples;
}//end of mc_rnd method
public static int[][] mc_rnd(int num_rows, int num_cols, double[] init_prob, double[][] trans_mat_homog_part, double[][] trans_mat_inhomog_part) {
int[][] samples = new int[num_rows][];
for(int t = 0; t < samples.length; t++)
samples[t] = mc_rnd(num_cols, init_prob, trans_mat_homog_part, trans_mat_inhomog_part);
return samples;
}//end of mc_rnd method
/**
* Returns samples from an inhomogeneous Markov Chain. <br>
* @param num_cols
* @param init_prob
* @param trans_mat_homog_part the constant (homogeneous) part of the transition matrix
* @param trans_mat_inhomog_part the non-constant (inhomogeneous) part of the transition matrix (last row)
* @return
*/
public static int[] mc_rnd(int num_cols, double[] init_prob, double[][] trans_mat_homog_part, double[][] trans_mat_inhomog_part) {
int[] samples = new int[num_cols];
samples[0] = multinomial_rnd(init_prob);
for(int n = 1; n < samples.length; n++) {
//construct transition matrix
double[][] trans_mat = new double[trans_mat_homog_part.length+1][trans_mat_homog_part[0].length];
for(int k = 0; k < trans_mat.length-1; k++)
trans_mat[k] = trans_mat_homog_part[k];
trans_mat[trans_mat.length-1] = trans_mat_inhomog_part[n];
samples[n] = multinomial_rnd(trans_mat[samples[n-1]]);
}
return samples;
}//end of mc_rnd method
public static int[][] mc_rnd(int[] data_size, int[][] close_read_inds, double[] init_prob, double[][] trans_mat1_homog_part, double[][] trans_mat1_inhomog_part, double[][] trans_mat2) {
int[][] samples = new int[data_size.length][];
for(int t = 0; t < data_size.length; t++)
samples[t] = mc_rnd(data_size[t], close_read_inds[t], init_prob, trans_mat1_homog_part, trans_mat1_inhomog_part, trans_mat2);
return samples;
}//end of mc_rnd method
public static int[][] mc_rnd(int num_rows, int num_cols, int[][] close_read_inds, double[] init_prob, double[][] trans_mat1_homog_part, double[][] trans_mat1_inhomog_part, double[][] trans_mat2) {
int[][] samples = new int[num_rows][];
for(int t = 0; t < num_rows; t++)
samples[t] = mc_rnd(num_cols, close_read_inds[t], init_prob, trans_mat1_homog_part, trans_mat1_inhomog_part, trans_mat2);
return samples;
}//end of mc_rnd method
/**
*
* @param num_cols N
* @param close_read_inds 1xK (K<N) indices for reads that are within distance <tt>d</tt> from their previous read
* @param init_prob 1xM
* @param trans_mat1_homog_part (M-1)xM Homogeneous transition matrix part when adjacent reads are very close together (<=d)
* @param trans_mat1_inhomog_part KxM Inomogeneous transition matrix part when adjacent reads are very close together (<=d)
* @param trans_mat2 Homogeneous transition matrix when adjacent reads are far away (>d)
* @return
*/
public static int[] mc_rnd(int num_cols, int[] close_read_inds, double[] init_prob, double[][] trans_mat1_homog_part, double[][] trans_mat1_inhomog_part, double[][] trans_mat2) {
int[] samples = new int[num_cols];
samples[0] = multinomial_rnd(init_prob);
Arrays.sort(close_read_inds);
int n = 1;
for(int i = 0; i < close_read_inds.length; i++) {
// adjacent reads are far away
while(n < close_read_inds[i] && n < num_cols) {
samples[n] = multinomial_rnd(trans_mat2[samples[n-1]]);
n++;
}
if( n < num_cols ) {
// When n stops being less than close_read_inds[i], it becomes certainly
// equal to it. So, we take trans_mat1
//construct transition matrix 1 (for close adjacent reads)
double[][] trans_mat1 = new double[trans_mat1_homog_part.length+1][trans_mat1_homog_part[0].length];
for(int k = 0; k < trans_mat1.length-1; k++)
trans_mat1[k] = trans_mat1_homog_part[k];
trans_mat1[trans_mat1.length-1] = trans_mat1_inhomog_part[i];
samples[n] = multinomial_rnd(trans_mat1[samples[n-1]]);
n++;
}
else {
break;
}
}
while(n < num_cols) {
samples[n] = multinomial_rnd(trans_mat2[samples[n-1]]);
n++;
}
return samples;
}//end of mc_rnd method
/*
* Creates <tt>num_cols<tt> samples of an inhomogeneous Markov Chain, with an initial probability
* <tt>init_prob</tt> and a transition matrix <tt>trans_mat</tt>.
*/
public static int[] mc_rnd(int num_cols, double[] init_prob, double[][][] trans_mat) {
int[] samples = new int[num_cols];
samples[0] = multinomial_rnd(init_prob);
for(int n = 1; n < samples.length; n++) { samples[n] = multinomial_rnd(trans_mat[n][samples[n-1]]); }
return samples;
}//end of mc_rnd method
/*
* Creates <tt>num_cols<tt> samples of a Markov Chain, with an initial probability
* <tt>init_prob</tt> and a transition matrix <tt>trans_mat</tt>.
*/
public static int[] mc_rnd(int num_cols, double[] init_prob, double[][] trans_mat) {
int[] samples = new int[num_cols];
samples[0] = multinomial_rnd(init_prob);
for(int n = 1; n < samples.length; n++) { samples[n] = multinomial_rnd(trans_mat[samples[n-1]]); }
return samples;
}//end of mc_rnd method
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
/*
double[] prob = {.2, .5, .3};
int[] samples = Sampler.multinomial_sample(prob, 1000);
for(int i = 0; i < samples.length; i++)
System.out.print(samples[i]);
double count0 = 0;
double count1 = 0;
double count2 = 0;
for(int i = 0; i < samples.length; i++) {
if(samples[i] == 0) {count0++;}
else if(samples[i] == 1) {count1++;}
else {count2++;}
}
System.out.println();
System.out.printf("0: %.2f, 1: %.2f, 2: %.2f\n", count0/samples.length, count1/samples.length, count2/samples.length);
*/
/*
double[] init_prob = {.8, .2};
double[][] trans_mat = {{.6, .4},
{.3, .7}};
int num_rows = 1000; int num_cols = 1000;
int[][] samples = Sampler.mc_sample(num_rows, num_cols, init_prob, trans_mat);
double count_init_0 = 0;
double count00 = 0; double count01 = 0; double count10=0; double count11 = 0;
for(int t = 0; t < num_rows; t++) {
if(samples[t][0] == 0) { count_init_0++; }
for(int n = 1; n < num_cols; n++) {
if(samples[t][n-1] == 0) {
if(samples[t][n] == 0)
count00++;
else
count01++;
}
else {
if(samples[t][n] == 0)
count10++;
else
count11++;
}
}
}
double sum0 = count00+count01;
double sum1 = count10+count11;
System.out.printf("init_0: %.2f, init_1: %.2f\n\n", count_init_0/num_rows, (1-count_init_0/num_rows));
System.out.printf("a0->0: %.2f, a0->1: %.2f\n" +
"a1->0: %.2f, a1->1: %.2f\n", count00/sum0, count01/sum0, count10/sum1, count11/sum1);
*/
/*
double mu = 10.0;
double std = 4.0;
double[] samples = Sampler.gauss_rnd(20, mu, std);
*/
/*
double[] mu = {5, 50, 1000};
double[] std = {1, 10, 200};
int[] num_samples = {5, 10, 50};
//double[][] samples = Sampler.gauss_rnd(num_samples, mu, std);
double[][] prob = {
{.1, .9},
{.7, .3},
{.5, .5}
};
// int[][] sampler = Sampler.multinomial_rnd(num_samples, prob);
double[][] init_prob = {
{.2, .8},
{.5, .5},
{.35, .65}
};
double[][][] trans_mat = {
{{.8, .2},
{.6, .4}}
,
{{.6, .4},
{.5, .5}}
,
{{.1, .9},
{.2, .8}}
};
int[][] sampler = Sampler.mc_rnd(num_samples, init_prob, trans_mat);
*/
int[] unif_samples = unif_rnd(20, -3, 1);
int[] binom_samples = binom_rnd(1000000, 25, .99);
File f = new File("/Users/gio_fou/Desktop/binom_samples2.txt");
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new FileWriter(f));
for(int n = 0; n < binom_samples.length; n++)
bw.write(binom_samples[n] + "\t");
}
catch (IOException e) { e.printStackTrace(); }
finally {
try {
bw.close();
}
catch (IOException e) { e.printStackTrace(); }
}
int num_cols = 20;
int[] close_read_inds = {2, 4, 6};
double[] init_prob = {.4, .5, .1};
double[][] trans_mat1_homog_part = {
{.4, .3, .3},
{.2, .2, .6}
};
double[][] trans_mat1_inhomog_part = {
{.3, .6, .1},
{.8, .1, .1},
{.2, .1, .8}
};
double[][] trans_mat2 = {
{.1, .7, .2},
{.1, .6, .3},
{.7, .2, .1}
};
int[] samples1 = mc_rnd(num_cols, close_read_inds, init_prob, trans_mat1_homog_part, trans_mat1_inhomog_part, trans_mat2);
System.out.println("Fin");
}//end of main method
}