package org.cellocad.MIT.dnacompiler;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
/**
* Useful static functions for histogram-based scoring.
*
*/
public class HistogramUtil {
/**
*
* Allows histogram scoring from discrete input RPU values
*
* Uses a representative cytometry distribution, and centers the distribution at the desired mean.
*
*/
public static ArrayList<Double> getDefaultHistgramAtSpecifiedMean(double log_mean, String file_name_default) {
ArrayList<Double> histogram = new ArrayList<Double>();
ArrayList<String> rpus = Util.fileLines(file_name_default);
double total_logrpu = 0.0;
for (int r = 0; r < rpus.size(); ++r) {
double rpu = Double.valueOf(rpus.get(r));
double logrpu = Math.log10(rpu);
total_logrpu += logrpu;
}
double avg_logrpu = total_logrpu / rpus.size();
for (int r = 0; r < rpus.size(); ++r) {
double rpu = Double.valueOf(rpus.get(r));
double logrpu = Math.log10(rpu) - avg_logrpu + log_mean; //histogram centered at input value
histogram.add(logrpu);
}
return histogram;
}
/**
*
* Interpolate cytometry data
*
*/
public static void interpolateTransferFunctionTitrations(String gate_name, GateLibrary gate_library) {
Gate g = gate_library.get_GATES_BY_NAME().get(gate_name);
HistogramBins hbins = g.get_histogram_bins();
String var = "";
if(g.get_variable_thresholds().keySet().size() == 1) {
ArrayList<String> vars = new ArrayList<String>(g.get_variable_thresholds().keySet());
var = vars.get(0);
}
else {
return;
}
HistogramXfer xfer_hist = g.get_xfer_hist();
ArrayList<double[]> xfer_interp = new ArrayList<double[]>();
ArrayList<double[]> xfer_normal = g.get_xfer_hist().get_xfer_binned();
for(int i=0; i<hbins.get_LOG_BIN_CENTERS().length; ++i) { //scanning x
double x = hbins.get_LOG_BIN_CENTERS()[i];
double rpu_x = Math.pow(10, x);
HashMap<String, Double> variables = new HashMap<String, Double>();
variables.put(var, rpu_x);
double rpu_y = ResponseFunction.computeOutput(variables, g.get_params(), g.get_equation());
double hill_mean = Math.log10(rpu_y);
int hill_mean_bin = 0;
for(int ii=0; ii<hbins.get_NBINS(); ++ii) {
hill_mean_bin = ii;
if(hbins.get_LOG_BIN_CENTERS()[ii] > hill_mean) {
break;
}
}
if(x < xfer_hist.get_xfer_titration().get(0)) {
double[] interp = new double[hbins.get_NBINS()];
for(int ii=0; ii<xfer_normal.get(0).length; ++ii) {
double z = xfer_normal.get(0)[ii];
interp[ii] = z;
}
int current_mean_bin = HistogramUtil.bin_median(interp);
int delta = - current_mean_bin + hill_mean_bin;
double[] hill_interp = new double[hbins.get_NBINS()];
for(int y=0; y<hbins.get_NBINS(); ++y) {
if(y - delta < 0 || y - delta >= hbins.get_NBINS())
hill_interp[y] = 0.0;
else {
hill_interp[y] = interp[y-delta];
}
}
double[] hill_normal = HistogramUtil.normalize(hill_interp);
xfer_interp.add(hill_normal);
}
else if(x > Math.log10(xfer_hist.get_xfer_titration().get( xfer_hist.get_xfer_titration().size()-1 ))) {
double[] interp = new double[hbins.get_NBINS()];
for(int ii=0; ii<xfer_normal.get(xfer_normal.size()-1 ).length; ++ii) {
double z = xfer_normal.get(xfer_normal.size()-1)[ii];
interp[ii] = z;
}
int current_mean_bin = HistogramUtil.bin_median(interp);
int delta = - current_mean_bin + hill_mean_bin;
double[] hill_interp = new double[hbins.get_NBINS()];
for(int y=0; y<hbins.get_NBINS(); ++y) {
if(y - delta < 0 || y - delta >= hbins.get_NBINS())
hill_interp[y] = 0.0;
else {
hill_interp[y] = interp[y-delta];
}
}
double[] hill_normal = HistogramUtil.normalize(hill_interp);
xfer_interp.add(hill_normal);
}
else {
int indx_low = 0;
int indx_high = 11;
for(int j=1; j<xfer_hist.get_xfer_titration().size(); ++j) {
if(Math.log10(xfer_hist.get_xfer_titration().get(j)) > x) {
indx_high = j;
break;
}
}
indx_low = indx_high - 1;
//x
//y is bin
//z is counts
double x1 = Math.log10(xfer_hist.get_xfer_titration().get(indx_low));
double x2 = Math.log10(xfer_hist.get_xfer_titration().get(indx_high));
double weight = (x - x1) / (x2 - x1);
int median_bin_y1 = HistogramUtil.bin_median(xfer_normal.get(indx_low));
int median_bin_y2 = HistogramUtil.bin_median(xfer_normal.get(indx_high));
Double med_bin_interp = median_bin_y1 * (1 - weight) + median_bin_y2 * weight;
Integer median_bin_interp = med_bin_interp.intValue();
/////////////////////////////////////////////////
median_bin_interp = hill_mean_bin;
/////////////////////////////////////////////////
double[] interp = new double[hbins.get_NBINS()];
for(int y=0; y<hbins.get_LOG_BIN_CENTERS().length; ++y) { //scanning x
int delta = median_bin_interp - y;
int y1 = median_bin_y1 - delta;
int y2 = median_bin_y2 - delta;
double z1 = 0.0; //counts, low boundary
double z2 = 0.0; //counts, high boundary
if(y1 >= 0 && y1 < hbins.get_NBINS()) {
z1 = xfer_normal.get(indx_low)[y1];
}
if(y2 >= 0 && y2 < hbins.get_NBINS()) {
z2 = xfer_normal.get(indx_high)[y2];
}
double z = z1 * (1 - weight) + z2 * weight;
interp[y] = z;
//double weighted_avg = y1 * (1 - weight) + y2 * weight;
//interp[ii] = weighted_avg;
}
double[] hill_normal = HistogramUtil.normalize(interp);
xfer_interp.add( hill_normal );
}
}
String all = "";
for(int i=0; i<hbins.get_NBINS(); ++i) { //print 500x500 matrix
String row = "";
for(int j=0; j<xfer_interp.size(); ++j) {
row += xfer_interp.get(j)[i] + " ";
}
all += row + "\n";
}
//write square matrix to text file
//String outname = "matrix_xfer_" + g.Name + ".txt";
//Util.fileWriter(Args.output_directory + outname, all, false);
xfer_hist.set_xfer_interp(xfer_interp);
g.set_xfer_hist(xfer_hist);
//make sure each titration sums to 1.000
/*for (int i = 0; i < hbins.get_NBINS(); ++i) {
double sum = 0.0;
for(int ii=0; ii<hbins.get_NBINS(); ++ii) {
sum += xfer_interp.get(i)[ii];
}
Print.message(2, "interp sum " + sum);
}*/
}
public static void getTransferFunctionHistogramTitrations(String gate_name, GateLibrary gate_library, HistogramBins hbins, String filepath) {
//Print.message(2, "getting transfer function histogram titrations for " + gate_name);
Gate g = gate_library.get_GATES_BY_NAME().get(gate_name);
HistogramXfer xfer_hist = g.get_xfer_hist();
//read raw data
ArrayList<ArrayList<Double>> xfer_data_raw = new ArrayList< ArrayList<Double> >();
for(int i=1; i<=12; ++i) {
xfer_data_raw.add( new ArrayList<Double>() );
String file_name = filepath + g.Name + "/rpus_" + g.Name + "_" + String.format("%02d", i) + ".txt";
//System.out.println("ls " + file_name);
File f = new File(file_name);
if(!f.exists()) {
System.out.println("cannot find file " + file_name);
System.exit(-1);
}
ArrayList<String> rpu_lines = Util.fileLines(file_name);
for (String r : rpu_lines) {
double logrpu = Math.log10(Double.valueOf(r));
xfer_data_raw.get(i - 1).add(logrpu);
}
}
xfer_hist.set_xfer_data_raw( xfer_data_raw );
//bin
ArrayList<double[]> xfer_binned = new ArrayList<double[]>();
for(int i=0; i<12; ++i) {
double[] binned = HistogramUtil.placeDataIntoBins(xfer_hist.get_xfer_data_raw().get(i), hbins);
xfer_binned.add(binned);
}
xfer_hist.set_xfer_binned( xfer_binned );
}
/**
* Not in use.
*
* generic function to perform binning to generate a histogram from a list of numbers.
* Instead, see placeDataIntoBins.
*
*/
public static double[] calcHistogram(ArrayList<Double> data, double min, double max, int numBins, boolean logrpu) {
final double[] result = new double[numBins];
final double binSize = (max - min)/numBins;
for (double d : data) {
if(logrpu) {
d = Math.log10(d);
}
int bin = (int) ((d - min) / binSize);
if (bin < 0) { /* this data is smaller than min */ }
else if (bin >= numBins) { /* this data point is bigger than max */ }
else {
result[bin] += 1.0;
}
}
return result;
}
/**
* normalize histogram, sum of all fractional counts = 1.
*
*/
public static double[] normalize(double[] data) {
double[] norm = new double[ data.length ];
double total_sum = 0.0;
for(int i=0; i<data.length; ++i) {
total_sum += data[i];
}
for(int i=0; i<data.length; ++i) {
norm[i] = data[i]/total_sum;
}
return norm;
}
//returns RPU, not log(RPU)
public static double median(ArrayList<Double> m) {
Collections.sort(m);
int middle = m.size()/2;
if (m.size()%2 == 1)
return m.get(middle);
else
return (m.get(middle-1) + m.get(middle)) / 2.0;
}
//returns RPU, not log(RPU)
public static double median(double[] data, HistogramBins hbins) {
double total_sum = 0.0;
for(int i=0; i<data.length; ++i) {
total_sum += data[i];
}
double halfway = total_sum / 2;
double current = 0.0;
int bin = 0;
for(int i=0; i<data.length; ++i) {
current += data[i];
if(current >= halfway) {
break;
}
++bin;
}
return Math.pow(10, hbins.get_LOG_BIN_CENTERS()[bin]);
}
/**
*
* returns bin index of median
*
*/
public static int bin_median(double[] data) {
double total_sum = 0.0;
for(int i=0; i<data.length; ++i) {
total_sum += data[i];
}
double halfway = total_sum / 2;
double current = 0.0;
int bin = 0;
for(int i=0; i<data.length; ++i) {
current += data[i];
if(current >= halfway) {
break;
}
++bin;
}
return bin;
}
/**
*
* Given an RPU, find the bin index of that RPU in the histogram
*
*/
public static int bin_of_logrpu(double logrpu, HistogramBins hbins) {
for(int i=0; i<hbins.get_LOG_BIN_CENTERS().length; ++i) {
if(hbins.get_LOG_BIN_CENTERS()[i] >= logrpu) {
return i;
}
}
return hbins.get_NBINS() - 1;
}
/**
*
* convert list of numbers to histogram
*
*/
public static double[] placeDataIntoBins(ArrayList<Double> data, HistogramBins hbins) {
final double[] result = new double[hbins.get_NBINS()];
final double binSize = (hbins.get_LOGMAX() - hbins.get_LOGMIN())/hbins.get_NBINS();
for (double d : data) {
int bin = (int) ((d - hbins.get_LOGMIN()) / binSize);
if (bin < 0) { /* this data is smaller than min */ }
else if (bin >= hbins.get_NBINS()) { /* this data point is bigger than max */ }
else {
result[bin] += 1.0;
}
}
return result;
}
/**
*
* translate a histogram left or right to the specified median
*
* compute the bin distance (# bins) separating the current median and new median
* shift fractional counts by that # of bins
* renormalize (in case tails are chopped off)
*
*/
public static double[] normalizeHistogramToNewMedian(double[] histogram, Double new_median, HistogramBins hbins) {
int bin_median = HistogramUtil.bin_median(histogram);
int shift = bin_median - HistogramUtil.bin_of_logrpu(Math.log10(new_median), hbins);
double[] shifted_histogram = new double[hbins.get_NBINS()];
for(int bin=0; bin<hbins.get_NBINS(); ++bin) {
int shifted_bin = bin - shift;
if(shifted_bin > 0 && shifted_bin < hbins.get_NBINS()) {
shifted_histogram[shifted_bin] = histogram[bin];
}
}
HistogramUtil.normalize(shifted_histogram);
return shifted_histogram;
}
};