package com.ppfold.algo;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import com.ppfold.algo.extradata.ExtraData;
import com.ppfold.algo.extradata.ForcedConstraints;
/**
* After reading and parsing input, algorithm execution starts here. (Note:
* Columns with many gaps are already removed)
*
* @author Z.Sukosd
*/
public class FoldingProject {
static final double LOG_TWO = Math.log(2d);
/**
* Makes sure that all leaves of the tree can be found, and creates a list
* of leaves for easy access.
*/
public static boolean checkInput(Tree tree, List<char[]> columns,
List<String> names) {
boolean treematchesnames = true;
boolean columnshaverightlengths = true;
boolean noillegalcharacters = true;
int firstsize = columns.get(0).length;
for (int col = 0; col < columns.size(); col++) {
if (columns.get(col).length != firstsize) {
columnshaverightlengths = false;
System.err
.println("Columns in alignment don't have same length!");
}
for (int row = 0; row < columns.get(col).length; row++) {
char thischar = columns.get(col)[row];
thischar = Character.toLowerCase(thischar);
if (thischar != 'a' && thischar != 'u' && thischar != 't'
&& thischar != 'g' && thischar != 'c'
&& thischar != 'r' && thischar != 'y'
&& thischar != 's' && thischar != 'w'
&& thischar != 'k' && thischar != 'm'
&& thischar != 'b' && thischar != 'd'
&& thischar != 'h' && thischar != 'v'
&& thischar != 'n'
&& !MatrixTools.isGap(thischar)) {
System.err.println("Illegal character! " + thischar);
noillegalcharacters = false;
}
}
}
for (int row = 0; row < columns.get(0).length; row++) {
// finds the node corresponding to the rownumber
// rownumber = corresponds to sequences.
Node node = tree.findSlowlyNodeWithName(names.get(row));
if (node == null) {
System.err.println("Can't find node with name "
+ names.get(row) + "!");
treematchesnames = false;
}
}
return treematchesnames && columnshaverightlengths
&& noillegalcharacters;
}
// public static ResultBundle fold(int phylojobsnr, int scfgjobsnr, Tree tree,
// List<char[]> columns, List<String> names, Parameters param,
// AsynchronousJobExecutor executor, boolean verbose, int execnr,
// List <ExtraData> extradata_list, boolean diffbp, boolean entropycalc) {
// try {
// return fold(NullProgress.INSTANCE, phylojobsnr, scfgjobsnr, tree,
// columns, names, param, executor, verbose, execnr, extradata_list, diffbp, entropycalc);
// } catch (InterruptedException e) {
// // Never happens - we use the NullActivity
// e.printStackTrace();
// return null;
// }
// }
public static ResultBundle fold(Progress act, int phylojobsnr,
int scfgjobsnr, Tree tree, List<char[]> columns,
List<String> names, Parameters param,
AsynchronousJobExecutor executor, boolean verbose, int execnr,
List <ExtraData> extradata_list, boolean diffbp, boolean entropycalc)
throws InterruptedException {
if(columns.size()<2){
return ResultBundle.tinyBundle();
}
double scfg_to_phylo_ratio = 0.095 * columns.size()
/ columns.get(0).length;
double phylopart = 0.95 / (scfg_to_phylo_ratio + 1);
if (phylopart < 0 | phylopart > 1) {
System.err.println("Time estimation resulted "
+ "in illegal value for phylogenetic calculations: "
+ phylopart);
phylopart = 0.475; // in case the estimation results in a
// "weird number", just set it to 50-50%.
}
double scfgpart = 0.95 - phylopart; // max is 95% because processing
// must happen before and after
act.setCurrentActivity("Applying evolutionary model");
//long starttime = System.currentTimeMillis();
double[][] probmatrix = FoldingProject.createPhyloProb(act
.getChildProgress(phylopart), phylojobsnr, tree, columns,
names, columns.size(), param, executor, verbose, execnr);
//If there are extra data, multiply probmatrix with the relevant values
double[][] probmatrix2 = null;
if(diffbp&&extradata_list!=null){
probmatrix2= new double[probmatrix.length][probmatrix[0].length];
for (int a = 0; a<probmatrix2.length;a++){
for(int b = 0;b<probmatrix2[0].length; b++){
probmatrix2[a][b] = probmatrix[a][b];
}
}
}
if(extradata_list!=null){
System.out.println("Number of auxiliary data items: " + extradata_list.size());
ArrayList<ForcedConstraints> fcList = new ArrayList<ForcedConstraints>();
for(ExtraData data:extradata_list){
if(data instanceof ForcedConstraints){
//If forcing constraints, collect them into one and remove from the list
fcList.add((ForcedConstraints) data);
}
}
for(ExtraData data:fcList){
extradata_list.remove(data);
}
if(fcList.size()>0){
System.out.println("Processing " + fcList.size () + " hard constraint datasets");
ForcedConstraints fcMerged = new ForcedConstraints();
fcMerged = fcMerged.combinedForcedConstraints(probmatrix.length, fcList);
extradata_list.add(fcMerged);
}
}
System.out.println("Processing all auxiliary data");
if(extradata_list!=null){
for(ExtraData extradata:extradata_list){
for(int i = 0; i<probmatrix.length; i++){
for(int j = 0; j<i; j++){
probmatrix[i][j] *= extradata.getProbabilityGivenInnerPaired(i,j);
probmatrix[j][i] = probmatrix[i][j];
}
probmatrix[i][i] *= extradata.getProbabilityGivenUnpaired(i);
}
}
}
if(diffbp&&extradata_list!=null){
//If inner and outer basepairs are distinguished
for(ExtraData extradata:extradata_list){
for(int i = 0; i<probmatrix2.length; i++){
for(int j = 0; j<i; j++){
probmatrix2[i][j] *= extradata.getProbabilityGivenOuterPaired(i,j);
probmatrix2[j][i] = probmatrix2[i][j];
}
probmatrix2[i][i] *= extradata.getProbabilityGivenUnpaired(i);
}
}
}
//System.out.println("Time in phylogenetic part: " + (System.currentTimeMillis()-starttime));
// do not allow basepairs less than 4 nucleotides apart.
for (int i = 0; i < probmatrix.length; i++) {
for (int j = 1; j < 4; j++) {
if (i + j < probmatrix.length) {
probmatrix[i][i + j] = 0;
}
if (i - j > 0) {
probmatrix[i][i - j] = 0;
}
}
}
if(diffbp&&extradata_list!=null){
for (int i = 0; i < probmatrix2.length; i++) {
for (int j = 1; j < 4; j++) {
if (i + j < probmatrix2.length) {
probmatrix2[i][i + j] = 0;
}
if (i - j > 0) {
probmatrix2[i][i - j] = 0;
}
}
}
}
System.out.println("Folding...");
act.setCurrentActivity("Applying grammar");
//starttime = System.currentTimeMillis();
ResultBundle result = FoldingProject.calcSCFG(act
.getChildProgress(scfgpart), scfgjobsnr, param.getProb(),
probmatrix, probmatrix2, executor, verbose, diffbp, entropycalc);
//System.out.println("Time in SCFG part: " + (System.currentTimeMillis()-starttime));
result.phyloProbs = probmatrix;
//RNAFoldingTools.writeMatrix(probmatrix, new File("probmatrix.txt"));
//Shut down the executor so we aren't hanging at the end
executor.shutDown();
return result;
}
public static ResultBundle foldMatrix(Progress act, int phylojobsnr,
int scfgjobsnr, List<char[]> columns,
List<String> names, double [][] inputMatrix, Parameters param,
AsynchronousJobExecutor executor, boolean verbose, int execnr,
List <ExtraData> extradata_list, boolean diffbp, boolean entropycalc)
throws InterruptedException {
if(columns.size()<2){
return ResultBundle.tinyBundle();
}
double scfg_to_phylo_ratio = 0.095 * columns.size()
/ columns.get(0).length;
double phylopart = 0.95 / (scfg_to_phylo_ratio + 1);
if (phylopart < 0 | phylopart > 1) {
System.err.println("Time estimation resulted "
+ "in illegal value for phylogenetic calculations: "
+ phylopart);
phylopart = 0.475; // in case the estimation results in a
// "weird number", just set it to 50-50%.
}
double scfgpart = 0.95 - phylopart; // max is 95% because processing
// must happen before and after
act.setCurrentActivity("Applying evolutionary model");
long starttime = System.currentTimeMillis();
/*double[][] probmatrix = FoldingProject.createPhyloProb(act
.getChildProgress(phylopart), phylojobsnr, tree, columns,
names, columns.size(), param, executor, verbose, execnr);*/
//If there are extra data, multiply probmatrix with the relevant values
double [][] probmatrix = new double[inputMatrix.length][inputMatrix[0].length];
for(int i = 0 ; i < probmatrix.length ; i++)
{
double sum = 0;
for(int j = 0 ; j < probmatrix[0].length ; j++)
{
probmatrix[i][j] = inputMatrix[i][j];
sum += probmatrix[i][j];
}
//probmatrix[i][i] = 1 - sum;
//System.out.println("XVX"+sum);
}
double[][] probmatrix2 = null;
if(diffbp&&extradata_list!=null){
probmatrix2= new double[probmatrix.length][probmatrix[0].length];
for (int a = 0; a<probmatrix2.length;a++){
for(int b = 0;b<probmatrix2[0].length; b++){
probmatrix2[a][b] = probmatrix[a][b];
}
}
}
if(extradata_list!=null){
System.out.println("Number of auxiliary data items: " + extradata_list.size());
ArrayList<ForcedConstraints> fcList = new ArrayList<ForcedConstraints>();
for(ExtraData data:extradata_list){
if(data instanceof ForcedConstraints){
//If forcing constraints, collect them into one and remove from the list
fcList.add((ForcedConstraints) data);
}
}
for(ExtraData data:fcList){
extradata_list.remove(data);
}
if(fcList.size()>0){
System.out.println("Processing " + fcList.size () + " hard constraint datasets");
ForcedConstraints fcMerged = new ForcedConstraints();
fcMerged = fcMerged.combinedForcedConstraints(probmatrix.length, fcList);
extradata_list.add(fcMerged);
}
}
System.out.println("Processing all auxiliary data");
if(extradata_list!=null){
for(ExtraData extradata:extradata_list){
for(int i = 0; i<probmatrix.length; i++){
for(int j = 0; j<i; j++){
probmatrix[i][j] *= extradata.getProbabilityGivenInnerPaired(i,j);
probmatrix[j][i] = probmatrix[i][j];
}
probmatrix[i][i] *= extradata.getProbabilityGivenUnpaired(i);
}
}
}
if(diffbp&&extradata_list!=null){
//If inner and outer basepairs are distinguished
for(ExtraData extradata:extradata_list){
for(int i = 0; i<probmatrix2.length; i++){
for(int j = 0; j<i; j++){
probmatrix2[i][j] *= extradata.getProbabilityGivenOuterPaired(i,j);
probmatrix2[j][i] = probmatrix2[i][j];
}
probmatrix2[i][i] *= extradata.getProbabilityGivenUnpaired(i);
}
}
}
//System.out.println("Time in phylogenetic part: " + (System.currentTimeMillis()-starttime));
// do not allow basepairs less than 4 nucleotides apart.
for (int i = 0; i < probmatrix.length; i++) {
for (int j = 1; j < 4; j++) {
if (i + j < probmatrix.length) {
probmatrix[i][i + j] = 0;
}
if (i - j > 0) {
probmatrix[i][i - j] = 0;
}
}
}
if(diffbp&&extradata_list!=null){
for (int i = 0; i < probmatrix2.length; i++) {
for (int j = 1; j < 4; j++) {
if (i + j < probmatrix2.length) {
probmatrix2[i][i + j] = 0;
}
if (i - j > 0) {
probmatrix2[i][i - j] = 0;
}
}
}
}
System.out.println("Folding...");
act.setCurrentActivity("Applying grammar");
//starttime = System.currentTimeMillis();
ResultBundle result = FoldingProject.calcSCFG(act
.getChildProgress(scfgpart), scfgjobsnr, param.getProb(),
probmatrix, probmatrix2, executor, verbose, diffbp, entropycalc);
//System.out.println("Time in SCFG part: " + (System.currentTimeMillis()-starttime));
//Shut down the executor so we aren't hanging at the end
executor.shutDown();
return result;
}
public static ResultBundle foldFuzzyAlignment(Progress act, int phylojobsnr,
int scfgjobsnr, Tree tree, List<FuzzyNucleotide[]> columns, FuzzyAlignment fuzzyAlignment,
List<String> names, Parameters param,
AsynchronousJobExecutor executor, boolean verbose, int execnr,
List <ExtraData> extradata_list, boolean diffbp, boolean entropycalc)
throws InterruptedException {
if(columns.size()<2){
return ResultBundle.tinyBundle();
}
double scfg_to_phylo_ratio = 0.095 * columns.size()
/ columns.get(0).length;
double phylopart = 0.95 / (scfg_to_phylo_ratio + 1);
if (phylopart < 0 | phylopart > 1) {
System.err.println("Time estimation resulted "
+ "in illegal value for phylogenetic calculations: "
+ phylopart);
phylopart = 0.475; // in case the estimation results in a
// "weird number", just set it to 50-50%.
}
double scfgpart = 0.95 - phylopart; // max is 95% because processing
// must happen before and after
act.setCurrentActivity("Applying evolutionary model");
//long starttime = System.currentTimeMillis();
double[][] probmatrix = FoldingProject.createPhyloProbForFuzzyAlignment(act
.getChildProgress(phylopart), phylojobsnr, tree, columns, fuzzyAlignment,
names, columns.size(), param, executor, verbose, execnr);
//If there are extra data, multiply probmatrix with the relevant values
double[][] probmatrix2 = null;
if(diffbp&&extradata_list!=null){
probmatrix2= new double[probmatrix.length][probmatrix[0].length];
for (int a = 0; a<probmatrix2.length;a++){
for(int b = 0;b<probmatrix2[0].length; b++){
probmatrix2[a][b] = probmatrix[a][b];
}
}
}
if(extradata_list!=null){
System.out.println("Number of auxiliary data items: " + extradata_list.size());
ArrayList<ForcedConstraints> fcList = new ArrayList<ForcedConstraints>();
for(ExtraData data:extradata_list){
if(data instanceof ForcedConstraints){
//If forcing constraints, collect them into one and remove from the list
fcList.add((ForcedConstraints) data);
}
}
for(ExtraData data:fcList){
extradata_list.remove(data);
}
if(fcList.size()>0){
System.out.println("Processing " + fcList.size () + " hard constraint datasets");
ForcedConstraints fcMerged = new ForcedConstraints();
fcMerged = fcMerged.combinedForcedConstraints(probmatrix.length, fcList);
extradata_list.add(fcMerged);
}
}
System.out.println("Processing all auxiliary data");
if(extradata_list!=null){
for(ExtraData extradata:extradata_list){
for(int i = 0; i<probmatrix.length; i++){
for(int j = 0; j<i; j++){
probmatrix[i][j] *= extradata.getProbabilityGivenInnerPaired(i,j);
probmatrix[j][i] = probmatrix[i][j];
}
probmatrix[i][i] *= extradata.getProbabilityGivenUnpaired(i);
}
}
}
if(diffbp&&extradata_list!=null){
//If inner and outer basepairs are distinguished
for(ExtraData extradata:extradata_list){
for(int i = 0; i<probmatrix2.length; i++){
for(int j = 0; j<i; j++){
probmatrix2[i][j] *= extradata.getProbabilityGivenOuterPaired(i,j);
probmatrix2[j][i] = probmatrix2[i][j];
}
probmatrix2[i][i] *= extradata.getProbabilityGivenUnpaired(i);
}
}
}
//System.out.println("Time in phylogenetic part: " + (System.currentTimeMillis()-starttime));
// do not allow basepairs less than 4 nucleotides apart.
for (int i = 0; i < probmatrix.length; i++) {
for (int j = 1; j < 4; j++) {
if (i + j < probmatrix.length) {
probmatrix[i][i + j] = 0;
}
if (i - j > 0) {
probmatrix[i][i - j] = 0;
}
}
}
if(diffbp&&extradata_list!=null){
for (int i = 0; i < probmatrix2.length; i++) {
for (int j = 1; j < 4; j++) {
if (i + j < probmatrix2.length) {
probmatrix2[i][i + j] = 0;
}
if (i - j > 0) {
probmatrix2[i][i - j] = 0;
}
}
}
}
System.out.println("Folding...");
act.setCurrentActivity("Applying grammar");
//starttime = System.currentTimeMillis();
ResultBundle result = FoldingProject.calcSCFG(act
.getChildProgress(scfgpart), scfgjobsnr, param.getProb(),
probmatrix, probmatrix2, executor, verbose, diffbp, entropycalc);
//System.out.println("Time in SCFG part: " + (System.currentTimeMillis()-starttime));
//Shut down the executor so we aren't hanging at the end
executor.shutDown();
return result;
}
private static double[][] createPhyloProbForFuzzyAlignment(Progress act, int userjobsnr,
Tree tree, List<FuzzyNucleotide[]> columns_char, FuzzyAlignment fuzzyAlignment, List<String> names,
final int length, Parameters param,
AsynchronousJobExecutor executor, final boolean verbose, int execnr)
throws InterruptedException {
final long starttime = System.nanoTime();
if(verbose){
System.out.println("Timer (phylogeny) started. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
}
if (verbose) {
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out.println("Processing input...");
}
if (verbose) {
act.setCurrentActivity("Evolutionary model: processing input");
}
List<FuzzyNucleotide[]> columns = new ArrayList<FuzzyNucleotide[]>();
/*
for (int i = 0; i < columns_char.size(); i++) {
columns.add(MatrixTools.convertColumn(columns_char.get(i)));
}*/
for (int i = 0; i < columns_char.size(); i++) {
columns.add(columns_char.get(i));
}
if (verbose) {
System.out.println("User wish for number of divisions: " + userjobsnr);
}
// correct user input
if (userjobsnr > length) {
userjobsnr = length;
} else if (userjobsnr < 1) {
userjobsnr = 1;
}
int nrjobs = userjobsnr;
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out
.println("Actual nr. of divisions in phylogenetic calculations: "
+ nrjobs);
}
if (verbose) {
System.out.println("Generating matrices for each node...");
}
tree.generateLeafList(names); // to speed up calculations later
final double[][] probmatrix = new double[length][length];
List<PhyloJobFuzzy> jobs = new ArrayList<PhyloJobFuzzy>();
// First generate "exp(Rt)" matrix for each node.
// This is the same for all columns of that node.
tree.getRoot().calculateChildrenMatrix(param.getSD(), param.getSV(),
param.getSV1());
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out.println("Generating jobs...");
}
if (verbose) {
act.setCurrentActivity("Evolutionary model: dividing tasks");
}
// Create jobs for SINGLE columns
// now all single columns are in one job
// Add the only single-column job
int colcnt = 0;
PhyloJobFuzzy lastjob = new PhyloJobFuzzy(fuzzyAlignment);
lastjob.tree = Tree.copyTree(tree); // must copy the entire tree into
// each phylojob
lastjob.names = names; // must copy the names into each phylojob (for
// debugging only)
lastjob.startcol = 0;
lastjob.jobid = 0;
lastjob.endcol = length - 1;
lastjob.type = false;
lastjob.param = param;
for (int col = 0; col < length; col++) {
// send columns
lastjob.columns.add(columns.get(col));
lastjob.columnIndices.add(col);
colcnt++;
}
jobs.add(lastjob);
// Create jobs for column PAIRS
// count total number of column pairs
int paircnt = 0;
for (int i = 0; i < length; i++) {
for (int j = i + 1; j < length; j++) {
paircnt++;
}
}
if(verbose){
System.out.println("Total number of pairs: " + paircnt);
System.out.println("Pairs in a job: " + (paircnt) / nrjobs);
}
// have to calculate matrices, but only once
tree.getRoot().calculateChildrenMatrix(param.getDD(), param.getDV(),
param.getDV1());
int lastendcol = -1;
int paircolcnt = 0;
// add all jobs except last
for (int jobnr = 0; jobnr < nrjobs - 1; jobnr++) {
PhyloJobFuzzy job = new PhyloJobFuzzy(fuzzyAlignment);
job.tree = Tree.copyTree(tree); // must copy the entire tree into
// each phylojob
job.names = names; // must copy the names into each phylojob
job.type = true;
job.param = param;
job.jobid = jobnr + 1;
int col1 = lastendcol + 1;
job.startcol = col1;
paircolcnt += length - col1;
col1--;
while (paircolcnt < ((long)(jobnr + 1) * (paircnt)) / nrjobs) {
col1++;
if (col1 == length) { // no more columns left, finish
col1--;
break;
}
job.columns.add(columns.get(col1));
job.columnIndices.add(col1);
paircolcnt += length - col1;
}
job.endcol = col1;
lastendcol = col1;
for (int col2 = job.startcol + 1; col2 < length; col2++) {
// send pairing columns
job.columns2.add(columns.get(col2));
job.columnIndices2.add(col2);
}
jobs.add(job);
paircolcnt -= length - col1;
}
// Add the last job
lastjob = new PhyloJobFuzzy(fuzzyAlignment);
lastjob.tree = Tree.copyTree(tree); // must copy the entire tree into
// each phylojob
lastjob.names = names; // must copy the names into each phylojob
lastjob.startcol = lastendcol + 1;
lastjob.endcol = length;
lastjob.type = true;
lastjob.param = param;
lastjob.jobid = nrjobs;
for (int col1 = lastjob.startcol; col1 < length; col1++) {
// send columns
lastjob.columns.add(columns.get(col1));
lastjob.columnIndices.add(col1);
}
for (int col2 = lastjob.startcol + 1; col2 < length; col2++) {
// send pairing columns
lastjob.columns2.add(columns.get(col2));
lastjob.columnIndices2.add(col2);
}
jobs.add(lastjob);
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out
.println("Total number of jobs in phylogenetic calculations: "
+ jobs.size());
}
if (verbose) {
System.out.println("Executing jobs...");
}
// start executing the jobs
// counts how many phylojobs are done
final AtomicInteger finishedphylojobscount = new AtomicInteger(0);
act.setCurrentActivity("Evolutionary model: " +
"calculating column probabilities");
long gridstarttime = System.nanoTime();
// execute single column jobs
Progress singleColAct = act.getChildProgress(0.1);
for (int jobnr = 0; jobnr < 1; jobnr++) {
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
PhyloJobFuzzy job = jobs.get(jobnr);
//final int jn = jobnr;
final int startcol = job.startcol;
final Progress jobAct = singleColAct
.getChildProgress((float) job.columns.size()
/ (float) colcnt);
executor.startExecution(job, new JobListener() {
@Override
public void jobFinished(JobResults result) {
} // doesn't happen here
@Override
public void jobFinished(List<ResultBundle> result) {
} // doesn't happen here
@Override
public void jobFinished(double[][] result) {
for (int col = 0; col < result.length; col++) {
probmatrix[col + startcol][col + startcol] = result[col][0];
}
finishedphylojobscount.incrementAndGet();
jobAct.setProgress(1.0);
}
});
}
Progress doubleColAct = act.getChildProgress(0.9);
// execute double column jobs
for (int jobnr = 1; jobnr < jobs.size(); jobnr++) {
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
PhyloJobFuzzy job = jobs.get(jobnr);
//final int jn = jobnr;
final int startcol = job.startcol;
final int endcol = job.endcol;
final Progress jobAct = doubleColAct
.getChildProgress((float) job.columns.size()
/ (float) length);
if (job.columns2.size() != 0) {
executor.startExecution(job, new JobListener() {
@Override
public void jobFinished(JobResults result) {
} // doesn't happen here
@Override
public void jobFinished(List<ResultBundle> result) {
} // doesn't happen here
@Override
public void jobFinished(double[][] result) {
for (int col1 = startcol; col1 < endcol + 1; col1++) {
for (int col2 = col1 + 1; col2 < length; col2++) {
probmatrix[col1][col2] = result[col1 - startcol][col2
- startcol - 1];
probmatrix[col2][col1] = probmatrix[col1][col2]; // make
// it
// symmetric
}
}
finishedphylojobscount.incrementAndGet();
jobAct.setProgress(1.0);
}
});
} else {
finishedphylojobscount.incrementAndGet();
jobAct.setProgress(1.0);
}
}
// wait for last thread to finish
// wait for jobs to finish
while (finishedphylojobscount.get() < jobs.size()) {
Thread.sleep(100);
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
}
singleColAct.setProgress(1.0);
doubleColAct.setProgress(1.0);
act.setProgress(1.0);
//act.setCurrentActivity("Evolutionary model: done");
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
System.out.println("TOTAL TIME ELAPSED IN PHYLOGENETIC PART: "
+ (int)((System.nanoTime() - starttime) * 1e-9) + " seconds ");
if(verbose){
System.out.println(" ...of which distributed: "
+ (System.nanoTime() - gridstarttime) * 1e-9 + " seconds");
}
//System.out.println();
// Result contains the a priori probability distribution matrix.
return probmatrix;
}
private static ResultBundle calcSCFG(Progress act, int userjobsnr,
double[][] prob, double[][] probmatrix, double[][] probmatrix2,
AsynchronousJobExecutor executor, final boolean verbose, final boolean diffbp, final boolean entropycalc)
throws InterruptedException {
PointRes tmp = new PointRes(0, 0);
final long starttime = System.nanoTime();
double entropyVal = 0;
double entropyPercOfMax = 0;
double entropyMax = 0;
if (verbose) {
System.out.println("Timer (SCFG) started. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
System.out.println("Processing input...");
}
act.setCurrentActivity("Applying grammar: processing input");
int length = probmatrix.length;
// First corrections of user input
if (userjobsnr > length) {
userjobsnr = length - 1;
} else if (userjobsnr < 1) {
userjobsnr = 1;
} else {
}
// Now calculate number of jobs
int distance = length / userjobsnr + 1; // the +1 is to prevent
// generation of superfluous
// jobs
int firstrowspaces = length + 1;
int isthereextra = (firstrowspaces % distance != 0) ? 1 : 0;
// triangle is compelete iff isthereextra = 0.
int nrdivisions = ((firstrowspaces - firstrowspaces % distance) / distance)
+ isthereextra;
// nrdivisions = how many jobs should there be in 1st row?
int nrsectors = nrdivisions * (nrdivisions + 1) / 2;
// total number of jobs in full triangle
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out.println("User wish for number of divisions: "
+ userjobsnr);
System.out
.println("Number of sectors in inside-outside calculations: "
+ nrsectors);
System.out
.println("Distance for inside-outside has been calculated as: "
+ distance);
System.out
.println("Corrected number of jobs in first row of inside-outside: "
+ nrdivisions);
System.out
.println("Total number of inside-outside jobs to generate: "
+ nrsectors);
}
int interestingpoints = length * length / 2;
int totalpoints = nrsectors * distance * distance;
int extrapoints = totalpoints - interestingpoints;
float fraction = (float) extrapoints * 100 / interestingpoints;
if(verbose){
System.out.println("Divisions = " + nrdivisions
+ ", Interesting points: " + interestingpoints
+ ", Extra points: " + extrapoints + ", Total points: "
+ totalpoints + ", " + "Fractional extra: " + fraction + "%");
}
if (verbose) {
System.out.println("Generating sectors: ");
}
act.setCurrentActivity("Applying grammar: generating sectors");
Sector top = SectorGenerator.GenerateSectors(nrsectors, distance,
nrdivisions, length,diffbp);
final Master master = new Master(top, prob);
// sector: set basepairs (phylogenetic probabilities) for inside sectors
PointRes number = new PointRes(0, 0);
for (int i = 0; i < length; i++) {
for (int j = 0; j < length - i; j++) {
Sector sectoro = findSector(i, j, master.bottom);
int so = findPointST(i, j, sectoro, distance)[0];
int to = findPointST(i, j, sectoro, distance)[1];
number.setToDouble(probmatrix[i][i + j]);
sectoro.setBasePairs(so, to, number);
}
}
if(diffbp&&probmatrix2!=null){
//if differentiating inner and outerbasepairs, create a new bp matrix per sector
for (int i = 0; i < length; i++) {
for (int j = 0; j < length - i; j++) {
Sector sectoro = findSector(i, j, master.bottom);
int so = findPointST(i, j, sectoro, distance)[0];
int to = findPointST(i, j, sectoro, distance)[1];
number.setToDouble(probmatrix2[i][i + j]);
sectoro.setBasePairs2(so, to, number);
}
}
}
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
act.setCurrentActivity("Applying grammar: inside algorithm");
if (verbose) {
System.out.println("Calculating inside values... ");
}
final AtomicInteger finishedinsidejobscount = new AtomicInteger(0); // counts
// how many inside jobs are done
master.CreateInsideJobChannel();
Progress insideAct = act.getChildProgress(0.32);
//final long gridstarttime = System.nanoTime();
while (master.unProcessedInsideSectors()) {
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
CYKJob cYKJob = master.takeNextInsideJob(); // This call will block
// until a job is ready
final Progress jobAct = insideAct.getChildProgress(1.0 / nrsectors);
// System.out.println(cYKJob.sectorid + " " + " 0 " +
// (System.nanoTime()-starttime));
final int sectorNumber = cYKJob.getSectorid();
executor.startExecution(cYKJob, new JobListener() {
@Override
public void jobFinished(JobResults result) {
master.setInsideResult(sectorNumber, result);
jobAct.setProgress(1.0);
// System.out.println(sectorNumber + " " + " 1 " +
// (System.nanoTime()-starttime));
finishedinsidejobscount.incrementAndGet();
}
@Override
public void jobFinished(double[][] result) {
}// doesn't happen here
@Override
public void jobFinished(List<ResultBundle> result) {
} // doesn't happen here
});
}
// wait for last job to finish
while (finishedinsidejobscount.get() < nrsectors) {
Thread.sleep(100);
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
}
insideAct.setProgress(1.0);
if(verbose){
System.out.println("Top inside: "
+ master.top.getInsideMatrixS().getProb(distance - 1,
length - 1 - master.top.pos[1]));
}
//long insidetime = System.nanoTime();
//System.out.print(nrdivisions + " - ");
//System.out.println("TOTAL TIME ELAPSED IN INSIDE PART (ALL): "
// + (insidetime - starttime) * 1e-9 + " seconds ");
//double gridtime1 = (insidetime - gridstarttime) * 1e-9;
//System.out.println("TOTAL TIME ELAPSED IN INSIDE PART (DISTRIBUTED): "
// + gridtime1 + " seconds ");
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
act.setCurrentActivity("Applying grammar: outside algorithm");
if (verbose) {
System.out.println("Calculating outside values...");
}
master.CreateOutsideJobChannel();
final AtomicInteger finishedoutsidejobscount = new AtomicInteger(0); // counts
// how many outside jobs are done
// outside algorithm
// set basepairs for outside algorithm (they are shifted relative to
// inside algo)
number.setToFloat(0);
for (int i = 0; i < length; i++) {
for (int j = 0; j < length - i; j++) {
Sector sectoro = findSector(i, j, master.bottom);
int so = findPointST(i, j, sectoro, distance)[0];
int to = findPointST(i, j, sectoro, distance)[1];
if (i > 0 && (j + i + 1) < length) {
number.setToDouble(probmatrix[i - 1][i + j + 1]);
} else if (i == 0) {
number.setToFloat(0);
} else if (j + i + 1 == length) {
number.setToFloat(0);
}
sectoro.setBasePairs(so, to, number);
}
}
if(diffbp){
for (int i = 0; i < length; i++) {
for (int j = 0; j < length - i; j++) {
Sector sectoro = findSector(i, j, master.bottom);
int so = findPointST(i, j, sectoro, distance)[0];
int to = findPointST(i, j, sectoro, distance)[1];
if (i > 0 && (j + i + 1) < length) {
number.setToDouble(probmatrix2[i - 1][i + j + 1]);
} else if (i == 0) {
number.setToFloat(0);
} else if (j + i + 1 == length) {
number.setToFloat(0);
}
sectoro.setBasePairs2(so, to, number);
}
}
}
//final long outsidegridstarttime = System.nanoTime();
Progress outsideAct = act.getChildProgress(0.50);
while (master.unProcessedOutsideSectors()) {
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
final Progress jobAct = outsideAct
.getChildProgress(1.0 / nrsectors);
CYKJob cYKJob = master.takeNextOutsideJob(); // This call will block
// until a job is
// ready
final int sectorNumber = cYKJob.getSectorid();
executor.startExecution(cYKJob, new JobListener() {
// System.out.println(sectorNumber + " " + " 2 " +
// (System.nanoTime()-starttime));
@Override
public void jobFinished(List<ResultBundle> result) {
} // doesn't happen here
@Override
public void jobFinished(JobResults result) {
master.setOutsideResult(sectorNumber, result);
jobAct.setProgress(1.0);
// System.out.println(sectorNumber + " " + " 3 " +
// (System.nanoTime()-starttime));
finishedoutsidejobscount.incrementAndGet();
}
@Override
public void jobFinished(double[][] result) {
}// doesn't happen here
});
}
// wait for last job to finish
while (finishedoutsidejobscount.get() < nrsectors) {
Thread.sleep(100);
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
}
outsideAct.setProgress(1.0);
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
// long outsidetime = (System.nanoTime());
// System.out.print(nrdivisions + " - ");
// System.out.println("TOTAL TIME ELAPSED IN OUTSIDE PART: "
// + (outsidetime - insidetime) * 1e-9 + " seconds ");
// double gridtime2 = (outsidetime - outsidegridstarttime) * 1e-9;
// System.out.println("TOTAL TIME ELAPSED IN OUTSIDE PART (DISTRIBUTED): "
// + gridtime2 + " seconds ");
if (verbose) {
System.out.println("Setting basepairs...");
}
act.setCurrentActivity("Applying grammar: setting basepairs");
number.setToFloat(0);
PointRes number2 = new PointRes(0, 0);
//Expected rule frequencies
PointRes EfLdFd = new PointRes(0,0);
PointRes EfFdFd = new PointRes(0,0);
PointRes EfSL = new PointRes(0,0);
PointRes EfSLS = new PointRes(0,0);
PointRes EfLs = new PointRes(0,0);
PointRes EfFLS = new PointRes(0,0);
//Expected nonterminal frequencies (for probability reestimation)
PointRes EfS = new PointRes(0,0);
PointRes EfL = new PointRes(0,0);
PointRes EfF = new PointRes(0,0);
PointRes topInsideS = new PointRes(0,0);
topInsideS.copyFrom(master.top.getInsideMatrixS().fetchProb(
distance - 1, length - 1 - master.top.pos[1], tmp));
PointRes insideval = new PointRes(0,0);
PointRes outsideval = new PointRes(0,0);
PointRes entropyLikelihood = new PointRes(0,0);
//MatrixTools.print(probmatrix);
// calculate basepair probabilities & basepair rule entropies
for (int i = 0; i < length; i++) {
for (int j = 1; j < length - i; j++) {
Sector sectoro = findSector(i, j, master.bottom);
Sector sectori = findSector(i + 1, j - 2, master.bottom);
int so = findPointST(i, j, sectoro, distance)[0];
int to = findPointST(i, j, sectoro, distance)[1];
int si = findPointST(i + 1, j - 2, sectori, distance)[0];
int ti = findPointST(i + 1, j - 2, sectori, distance)[1];
//L->dFd
number.copyFrom(sectoro.getOutsideMatrixL().fetchProb(so, to,
tmp));
number.multiply(sectori.getInsideMatrixF().fetchProb(si, ti,
tmp), prob[1][0]);
if(diffbp){
number.multiply(probmatrix2[i][i+j]);
}else{
number.multiply(probmatrix[i][i+j]);
}
EfLdFd.add(number);
number.divide(topInsideS);
//F->dFd
number2.copyFrom(sectoro.getOutsideMatrixF().fetchProb(so, to,
tmp));
number2.multiply(sectori.getInsideMatrixF().fetchProb(si, ti,
tmp), prob[2][0]);
number2.multiply(probmatrix[i][i+j]);
EfFdFd.add(number2);
number2.divide(topInsideS);
//Sum of expectation of the two rules is expectation of basepairing rule
number.add(number2);
sectoro.setBasePairs(so, to, number);
if(number.toFloat()>1){
if(verbose){
System.err.println("Warning: Pd(" + i + ", " + j +
") = " + number + " > 1! (Using 1...)" );
}
number.setToFloat(1);
}
if(probmatrix[i][i+j]!=0){
number.multiply(log2(1/probmatrix[i][i+j]));
//System.out.println(i + ", " + (i+j) + " pair entropy: " + number);
entropyLikelihood.add(number);
}
//System.out.print(number.toFloat() + " ");
//System.out.format("%16.8e", number.toDouble());
}
}
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out.println("Calculating single base probabilities...");
}
// calculate single basepairing
final PointRes one = new PointRes(1);
final PointRes minusone = new PointRes(-1);
final PointRes zero = new PointRes(0);
for (int i = 0; i < length; i++) {
PointRes singlebaseprobpoint = new PointRes(0);
for (int k = 1; k < length - i; k++) { // pairs to the right
Sector basepairsector = findSector(i, k, master.bottom);
int[] basepairpoint = findPointST(i, k, basepairsector,
distance);
singlebaseprobpoint.add(basepairsector.getMatrixBpVal(
basepairpoint[0], basepairpoint[1], tmp));
}
for (int k = 0; k < i; k++) { // pairs to the left
Sector basepairsector = findSector(k, i - k, master.bottom);
int[] basepairpoint = findPointST(k, i - k, basepairsector,
distance);
singlebaseprobpoint.add(basepairsector.getMatrixBpVal(
basepairpoint[0], basepairpoint[1], tmp));
}
tmp.copyFrom(minusone);
singlebaseprobpoint.multiply(tmp);
tmp.copyFrom(one);
tmp.add(singlebaseprobpoint);
singlebaseprobpoint.copyFrom(tmp);
if (singlebaseprobpoint.isSignificantlyLessThanZero()) {
System.err.println("Illegal Ps(" + i + ") = "
+ singlebaseprobpoint
+ ": (significantly less than zero); using 0");
singlebaseprobpoint.setToFloat(0);
}
Sector sec = findSector(i, 0, master.bottom);
int[] sbasepoint = findPointST(i, 0, sec, distance);
sec.setBasePairs(sbasepoint[0], sbasepoint[1], singlebaseprobpoint);
if(probmatrix[i][i]!=0){
singlebaseprobpoint.multiply(log2(1/probmatrix[i][i]));
entropyLikelihood.add(singlebaseprobpoint);
}
//System.out.format("%16.8e", singlebaseprobpoint.toDouble());
}
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if(entropycalc){
for (int i = 0; i < length; i++) {
//Entropy for L->s
Sector sector = findSector(i,0, master.bottom);
int s = findPointST(i,0,sector,distance)[0];
int t = findPointST(i,0,sector,distance)[1];
number.setToDouble(probmatrix[i][i]);
outsideval.copyFrom(sector.getOutsideMatrixL().fetchProb(s, t, tmp));
number.multiply(outsideval);
number.multiply(prob[1][1]);
//number.divide(topInsideS);
EfLs.add(number);
for (int j = 0; j < length - i; j++) {
Sector sectoro = findSector(i, j, master.bottom);
int so = findPointST(i, j, sectoro, distance)[0];
int to = findPointST(i, j, sectoro, distance)[1];
insideval.copyFrom(sectoro.getInsideMatrixS().fetchProb(so,to,tmp));
outsideval.copyFrom(sectoro.getOutsideMatrixS().fetchProb(so, to,tmp));
outsideval.multiply(insideval);
EfS.add(outsideval);
insideval.copyFrom(sectoro.getInsideMatrixL().fetchProb(so,to,tmp));
outsideval.copyFrom(sectoro.getOutsideMatrixL().fetchProb(so, to,tmp));
outsideval.multiply(insideval);
EfL.add(outsideval);
insideval.copyFrom(sectoro.getInsideMatrixF().fetchProb(so,to,tmp));
outsideval.copyFrom(sectoro.getOutsideMatrixF().fetchProb(so, to,tmp));
outsideval.multiply(insideval);
EfF.add(outsideval);
insideval.copyFrom(sectoro.getInsideMatrixL().fetchProb(so,to,tmp));
outsideval.copyFrom(sectoro.getOutsideMatrixS().fetchProb(so, to,tmp));
outsideval.multiply(insideval);
outsideval.multiply(prob[0][1]);
//outsideval.divide(topInsideS);
EfSL.add(outsideval);
}
}
// calculate bifurcation probabilities NOTE O(n^3) complexity
/*PointRes insidevalue1 = new PointRes(0,0);
PointRes insidevalue2 = new PointRes(0,0);
for (int i = 0; i < length; i++) {
for (int j = 0; j < length - i; j++) {
for(int k = 0; k<j; k++){
//i+k is between i and i+j.
Sector sectoro = findSector(i, j, master.bottom);
Sector sectori1 = findSector(i, k, master.bottom);
Sector sectori2 = findSector(i+k+1, j-k-1, master.bottom);
//System.out.println(i + " " + (i+k) + " " + (i+j));
//Outside region (i,i+j)
int so = findPointST(i, j, sectoro, distance)[0];
int to = findPointST(i, j, sectoro, distance)[1];
//Inside region (i,i+k)
int si1 = findPointST(i, k, sectori1, distance)[0];
int ti1 = findPointST(i, k, sectori1, distance)[1];
//Inside region (i+k+1, j)
int si2 = findPointST(i+k+1, j-k-1, sectori2, distance)[0];
int ti2 = findPointST(i+k+1, j-k-1, sectori2, distance)[1];
//Common to both bifurcation rules
insidevalue1.copyFrom(sectori1.getInsideMatrixL().fetchProb(si1,ti1,tmp));
insidevalue2.copyFrom(sectori2.getInsideMatrixS().fetchProb(si2,ti2,tmp));
//S->LS: prob[0][0]
number.copyFrom(sectoro.getOutsideMatrixS().fetchProb(so, to, tmp));
number.multiply(insidevalue1);
number.multiply(insidevalue2);
number.multiply(prob[0][0]);
//number.divide(topInsideS);
EfSLS.add(number);
//F->LS: prob[2][1]
number.copyFrom(sectoro.getOutsideMatrixF().fetchProb(so, to, tmp));
number.multiply(insidevalue1);
number.multiply(insidevalue2);
number.multiply(prob[2][1]);
//number.divide(topInsideS);
EfFLS.add(number);
}
}
}
*/
//This is to save O(n^3) execution time
EfSLS.copyFrom(EfS);
EfSLS.subtract(EfSL,tmp);
EfFLS.copyFrom(EfF);
EfFLS.subtract(EfFdFd,tmp);
EfS.divide(topInsideS);
EfL.divide(topInsideS);
EfF.divide(topInsideS);
EfSLS.divide(topInsideS);
EfSL.divide(topInsideS);
EfLdFd.divide(topInsideS);
EfLs.divide(topInsideS);
EfFLS.divide(topInsideS);
EfFdFd.divide(topInsideS);
PointRes entropySLS = new PointRes(EfSLS);
PointRes entropySL = new PointRes(EfSL);
PointRes entropyLdFd = new PointRes(EfLdFd);
PointRes entropyLs = new PointRes(EfLs);
PointRes entropyFdFd = new PointRes(EfFdFd);
PointRes entropyFLS = new PointRes(EfFLS);
if(verbose){
System.out.println("Expected nonterminal freq S = " + EfS.toDouble() );
System.out.println("Expected nonterminal freq L = " + EfL.toDouble() );
System.out.println("Expected nonterminal freq F = " + EfF.toDouble() );
System.out.println("Expected rule freq S->LS = " + EfSLS.toDouble() );
System.out.println("Expected rule freq S->L = " + EfSL.toDouble() );
System.out.println("Expected rule freq L->dFd = " + EfLdFd.toDouble() );
System.out.println("Expected rule freq L->s = " + EfLs.toDouble() );
System.out.println("Expected rule freq F->LS = " + EfFLS.toDouble() );
System.out.println("Expected rule freq F->dFd = " + EfFdFd.toDouble() );
}
//EfSLS.divide(EfS);
EfSL.divide(EfS);
EfSLS.setToDouble(1-EfSL.toDouble());
EfLdFd.divide(EfL);
EfLs.divide(EfL);
EfFdFd.divide(EfF);
//EfFLS.divide(EfF);
EfFLS.setToDouble(1-EfFdFd.toDouble());
if(verbose){
System.out.println("Reestimation prob S->LS = " + EfSLS.toDouble() );
System.out.println("Reestimation prob S->L = " + EfSL.toDouble() );
System.out.println("Reestimation prob L->dFd = " + EfLdFd.toDouble() );
System.out.println("Reestimation prob L->s = " + EfLs.toDouble() );
System.out.println("Reestimation prob F->LS = " + EfFLS.toDouble() );
System.out.println("Reestimation prob F->dFd = " + EfFdFd.toDouble() );
}
/*
System.out.println("CHECK: Sum of rules from nonterminals:");
System.out.println("S: " + (float)( EfSLS.toDouble() + EfSL.toDouble() ));
System.out.println("L: " + (float)( EfLdFd.toDouble() + EfLs.toDouble() ));
System.out.println("F: " + (float)( EfFdFd.toDouble() + EfFLS.toDouble() ));
*/
entropySLS.multiply(new PointRes(log2(1/prob[0][0])));//.divide(LogTwo);
entropySL.multiply(new PointRes(log2(1/prob[0][1])));//.divide(LogTwo);
entropyLs.multiply(new PointRes(log2(1/prob[1][1])));//.divide(LogTwo);
entropyFLS.multiply(new PointRes(log2(1/prob[2][1])));//.divide(LogTwo);
entropyLdFd.multiply(new PointRes(log2(1/prob[1][0])));//.divide(LogTwo);
entropyFdFd.multiply(new PointRes(log2(1/prob[2][0])));//.divide(LogTwo);
PointRes entropy = new PointRes(entropySLS);
entropy.add(entropySL);
entropy.add(entropyLdFd);
entropy.add(entropyLs);
entropy.add(entropyFLS);
entropy.add(entropyFdFd);
if(verbose){System.out.println("Rule entropy component: " + entropy);}
PointRes logTopInsideS = new PointRes(topInsideS);
logTopInsideS.takeLog2();
//logTopInsideS.multiply(-1);
entropy.add(logTopInsideS);
if(verbose){System.out.println("Log top inside S component: " + logTopInsideS);}
//System.out.println("Log top inside S + rule entropy: " + entropy);
if(verbose){System.out.println("Likelihood entropy component: " + entropyLikelihood);}
entropy.add(entropyLikelihood);
System.out.println("ENTROPY: " + entropy + " = " + entropy.toDouble());
double maxentropy_nr = 0.142- 1.5*Math.log(length)/Math.log(2) + 1.388*length;
double percent_nr = (entropy.toDouble()/maxentropy_nr)*100;
System.out.format("(which is %2.3f%s of the maximum entropy, %2.2f) \n", percent_nr, "%", maxentropy_nr);
}
if (verbose) {
System.out.println("Cleaning memory...");
}
// assign null's to all inside-outside to enforce clearing of memory in
// case it didn't happen
Sector thissec = master.bottom;
thissec.clearAllInside();
thissec.clearAllOutside();
while (thissec.next != null) {
thissec = thissec.next;
thissec.clearAllInside();
thissec.clearAllOutside();
}
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out.println("Calculating expectation values...");
}
act.setCurrentActivity("Applying grammar: setting expectation values");
// calculate expectation values
master.CreateExpectationJobChannel();
final AtomicInteger finishedexpectationjobscount = new AtomicInteger(0); // counts
// how many exp jobs are done
Progress expectAct = act.getChildProgress(0.11);
//final long expgridstarttime = System.nanoTime();
while (master.unProcessedExpectationSectors()) {
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
final Progress jobAct = expectAct.getChildProgress(1.0 / nrsectors);
CYKJob cYKJob = master.takeNextExpectationJob(); // This call will
// block until a job is ready
// System.out.println(cYKJob.sectorid + " " + " 4 " +
// (System.nanoTime()-starttime));
final int sectorNumber = cYKJob.getSectorid();
executor.startExecution(cYKJob, new JobListener() {
@Override
public void jobFinished(JobResults result) {
master.setExpectationResult(sectorNumber, result);
jobAct.setProgress(1.0);
// System.out.println(sectorNumber + " " + " 5 " +
// (System.nanoTime()-starttime));
finishedexpectationjobscount.incrementAndGet();
}
@Override
public void jobFinished(double[][] result) {
}// doesn't happen here
@Override
public void jobFinished(List<ResultBundle> result) {
} // doesn't happen here
});
}
// wait for last job to finish
while (finishedexpectationjobscount.get() < nrsectors) {
Thread.sleep(100);
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
}
expectAct.setProgress(1.0);
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if(verbose){
System.out.println("Top expectation: "
+ master.top.getExpectationMatrix().getProb(distance - 1,
length - 1 - master.top.pos[1]));
}
// System.out.print(nrdivisions + " - ");
//System.out.println("TOTAL TIME ELAPSED IN EXPECTATION PART: "
// + (System.nanoTime() - outsidetime) * 1e-9 + " seconds ");
//double gridtime3 = (System.nanoTime() - expgridstarttime) * 1e-9;
//System.out
// .println("TOTAL TIME ELAPSED IN EXPECTATION PART (DISTRIBUTED): "
// + gridtime3 + " seconds ");
if (verbose) {
System.out.println("Backtracking to find structure...");
}
act.setCurrentActivity("Applying grammar: backtracking");
char[] structure = new char[length];
float[] reliability = new float[length];
LinkedList<int[]> checkPoint = new LinkedList<int[]>();
int[] startpoint = new int[2];
startpoint[0] = 0;
startpoint[1] = length - 1;
checkPoint.addFirst(startpoint);
PointRes expectationvalue = new PointRes(0, 0);
PointRes otherexpectationvalue = new PointRes(0, 0);
PointRes thirdexpectationvalue = new PointRes(0, 0);
PointRes basepairvalue = new PointRes(0, 0);
PointRes tmp2 = new PointRes(0, 0);
//int[] pairing = new int[length]; //this creates the pairing array
//for(int i = 0; i<length; i++){
// pairing[i] = -1;
//}
while (!checkPoint.isEmpty()) {
// Get the first element from the LinkedList.
int[] point = checkPoint.removeFirst();
// Check if this point is a single base.
Sector sec = findSector(point[0], point[1] - point[0],
master.bottom);
int[] stpoint = findPointST(point[0], point[1] - point[0], sec,
distance);
expectationvalue.copyFrom(sec.getMatrixExpectationTpVal(stpoint[0],
stpoint[1], tmp));
basepairvalue.copyFrom(sec.getMatrixBpVal(stpoint[0], stpoint[1],
tmp));
if (point[0] == point[1]) {
structure[point[0]] = '.';
reliability[point[0]] = basepairvalue.toFloat();
continue;
}
// Check if this point is paired.
Sector othersec = findSector(point[0] + 1, point[1] - 1
- (point[0] + 1), master.bottom);
int[] otherstpoint = findPointST(point[0] + 1, point[1] - 1
- (point[0] + 1), othersec, distance);
otherexpectationvalue.copyFrom(othersec.getMatrixExpectationTpVal(
otherstpoint[0], otherstpoint[1], tmp));
tmp.copyFrom(basepairvalue);
tmp.multiply(2);
tmp2.copyFrom(otherexpectationvalue);
tmp2.add(tmp);
if (expectationvalue.equals(tmp2)) {
// do the pairing
structure[point[0]] = '(';
structure[point[1]] = ')';
//pairing[point[0]] = point[1];
//pairing[point[1]] = point[0];
reliability[point[0]] = basepairvalue.toFloat();
reliability[point[1]] = basepairvalue.toFloat();
// push next point in the list
int[] nextpoint = new int[2];
nextpoint[0] = point[0] + 1;
nextpoint[1] = point[1] - 1;
checkPoint.addFirst(nextpoint);
continue;
} else { // if it isn't basepaired then it is bifuricated; find the
// two points that gave the value.
for (int k = point[0]; k < point[1]; k++) { // "k from i to j-1"
othersec = findSector(point[0], k - point[0], master.bottom);
otherstpoint = findPointST(point[0], k - point[0],
othersec, distance);
otherexpectationvalue.copyFrom(othersec
.getMatrixExpectationTpVal(otherstpoint[0],
otherstpoint[1], tmp));
Sector thirdsec = findSector(k + 1, point[1] - (k + 1),
master.bottom);
int[] thirdpoint = findPointST(k + 1, point[1] - (k + 1),
thirdsec, distance);
thirdexpectationvalue.copyFrom(thirdsec
.getMatrixExpectationTpVal(thirdpoint[0],
thirdpoint[1], tmp));
tmp.copyFrom(otherexpectationvalue);
tmp.add(thirdexpectationvalue);
if (expectationvalue.equals(tmp)) {
int[] nextpoint1 = new int[2];
nextpoint1[0] = point[0];
nextpoint1[1] = k;
int[] nextpoint2 = new int[2];
nextpoint2[0] = k + 1;
nextpoint2[1] = point[1];
checkPoint.addFirst(nextpoint2);
checkPoint.addFirst(nextpoint1);
break;
}
}
continue;
}
}
//System.out.println();
//for(int i = 0; i<length; i++){
// System.out.format("%08d", pairing[i]);
//}
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out.println("Finalizing results... ");
}
act.setCurrentActivity("Applying grammar: finalizing");
// copy basepairs to square matrix
float[][] expectationvalues = new float[length][length];
float[][] basepairprob = new float[length][length];
float[] singlebaseprob = new float[length];
for (int j = 1; j < length; j++) {
for (int i = 0; i < length - j; i++) {
Sector sectoro = findSector(i, j, master.bottom);
int so = sectoro.pos[0] + distance - i - 1;
int to = j + i - (sectoro.pos[1] + sectoro.pos[0]);
tmp.copyFrom(sectoro.getMatrixBpVal(so, to, tmp2));
basepairprob[i][i + j] = tmp.toFloat();
basepairprob[i + j][i] = basepairprob[i][i + j];
// make it symmetric
tmp.copyFrom(sectoro.getMatrixExpectationTpVal(so, to, tmp2));
expectationvalues[i][i + j] = tmp.toFloat();
expectationvalues[i + j][i] = expectationvalues[i][i + j];
// make it symmetric
}
}
for (int i = 0; i < length; i++) {
Sector sec = findSector(i, 0, master.bottom);
int s = sec.pos[0] + distance - i - 1;
int t = i - (sec.pos[1] + sec.pos[0]);
tmp.copyFrom(sec.getMatrixBpVal(s, t, tmp2));
singlebaseprob[i] = tmp.toFloat();
tmp.copyFrom(sec.getMatrixExpectationTpVal(s, t, tmp2));
expectationvalues[i][i] = tmp.toFloat();
}
// clean basepairing matrices
thissec = master.bottom;
thissec.clearAllBp();
while (thissec.next != null) {
thissec = thissec.next;
thissec.clearAllBp();
thissec.clearAllExpectation();
}
ResultBundle result = new ResultBundle();
result.structure = structure;
result.reliability = reliability;
result.basepairprob = basepairprob;
result.singlebaseprob = singlebaseprob;
result.expectationvalues = expectationvalues;
result.entropyVal = entropyVal;
result.entropyPercOfMax = entropyPercOfMax;
result.entropyMax = entropyMax;
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
// System.out.print(nrdivisions + " - ");
System.out.println("TOTAL TIME ELAPSED IN SCFG PART: "
+ (int)((System.nanoTime() - starttime) * 1e-9) + " seconds ");
// System.out.println(" ...of which distributed: "
// + (gridtime1 + gridtime2 + gridtime3) + " seconds ");
//System.out.println();
act.setProgress(1.0);
return result;
}
private static double[][] createPhyloProb(Progress act, int userjobsnr,
Tree tree, List<char[]> columns_char, List<String> names,
final int length, Parameters param,
AsynchronousJobExecutor executor, final boolean verbose, int execnr)
throws InterruptedException {
final long starttime = System.nanoTime();
if(verbose){
System.out.println("Timer (phylogeny) started. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
}
if (verbose) {
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out.println("Processing input...");
}
if (verbose) {
act.setCurrentActivity("Evolutionary model: processing input");
}
List<int[]> columns = new ArrayList<int[]>();
for (int i = 0; i < columns_char.size(); i++) {
columns.add(MatrixTools.convertColumn(columns_char.get(i)));
}
if (verbose) {
System.out.println("User wish for number of divisions: "
+ userjobsnr);
}
// correct user input
if (userjobsnr > length) {
userjobsnr = length;
} else if (userjobsnr < 1) {
userjobsnr = 1;
}
int nrjobs = userjobsnr;
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out
.println("Actual nr. of divisions in phylogenetic calculations: "
+ nrjobs);
}
if (verbose) {
System.out.println("Generating matrices for each node...");
}
tree.generateLeafList(names); // to speed up calculations later
final double[][] probmatrix = new double[length][length];
List<PhyloJob> jobs = new ArrayList<PhyloJob>();
// First generate "exp(Rt)" matrix for each node.
// This is the same for all columns of that node.
tree.getRoot().calculateChildrenMatrix(param.getSD(), param.getSV(),
param.getSV1());
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out.println("Generating jobs...");
}
if (verbose) {
act.setCurrentActivity("Evolutionary model: dividing tasks");
}
// Create jobs for SINGLE columns
// now all single columns are in one job
// Add the only single-column job
int colcnt = 0;
PhyloJob lastjob = new PhyloJob();
lastjob.tree = Tree.copyTree(tree); // must copy the entire tree into
// each phylojob
lastjob.names = names; // must copy the names into each phylojob (for
// debugging only)
lastjob.startcol = 0;
lastjob.jobid = 0;
lastjob.endcol = length - 1;
lastjob.type = false;
lastjob.param = param;
for (int col = 0; col < length; col++) {
// send columns
lastjob.columns.add(columns.get(col));
colcnt++;
}
jobs.add(lastjob);
// Create jobs for column PAIRS
// count total number of column pairs
int paircnt = 0;
for (int i = 0; i < length; i++) {
for (int j = i + 1; j < length; j++) {
paircnt++;
}
}
if(verbose){
System.out.println("Total number of pairs: " + paircnt);
System.out.println("Pairs in a job: " + (paircnt) / nrjobs);
}
// have to calculate matrices, but only once
tree.getRoot().calculateChildrenMatrix(param.getDD(), param.getDV(),
param.getDV1());
int lastendcol = -1;
int paircolcnt = 0;
// add all jobs except last
for (int jobnr = 0; jobnr < nrjobs - 1; jobnr++) {
PhyloJob job = new PhyloJob();
job.tree = Tree.copyTree(tree); // must copy the entire tree into
// each phylojob
job.names = names; // must copy the names into each phylojob
job.type = true;
job.param = param;
job.jobid = jobnr + 1;
int col1 = lastendcol + 1;
job.startcol = col1;
paircolcnt += length - col1;
col1--;
while (paircolcnt < ((long)(jobnr + 1) * (paircnt)) / nrjobs) {
col1++;
if (col1 == length) { // no more columns left, finish
col1--;
break;
}
job.columns.add(columns.get(col1));
paircolcnt += length - col1;
}
job.endcol = col1;
lastendcol = col1;
for (int col2 = job.startcol + 1; col2 < length; col2++) {
// send pairing columns
job.columns2.add(columns.get(col2));
}
jobs.add(job);
paircolcnt -= length - col1;
}
// Add the last job
lastjob = new PhyloJob();
lastjob.tree = Tree.copyTree(tree); // must copy the entire tree into
// each phylojob
lastjob.names = names; // must copy the names into each phylojob
lastjob.startcol = lastendcol + 1;
lastjob.endcol = length;
lastjob.type = true;
lastjob.param = param;
lastjob.jobid = nrjobs;
for (int col1 = lastjob.startcol; col1 < length; col1++) {
// send columns
lastjob.columns.add(columns.get(col1));
}
for (int col2 = lastjob.startcol + 1; col2 < length; col2++) {
// send pairing columns
lastjob.columns2.add(columns.get(col2));
}
jobs.add(lastjob);
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
if (verbose) {
System.out
.println("Total number of jobs in phylogenetic calculations: "
+ jobs.size());
}
if (verbose) {
System.out.println("Executing jobs...");
}
// start executing the jobs
// counts how many phylojobs are done
final AtomicInteger finishedphylojobscount = new AtomicInteger(0);
act.setCurrentActivity("Evolutionary model: " +
"calculating column probabilities");
long gridstarttime = System.nanoTime();
// execute single column jobs
Progress singleColAct = act.getChildProgress(0.1);
for (int jobnr = 0; jobnr < 1; jobnr++) {
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
PhyloJob job = jobs.get(jobnr);
//final int jn = jobnr;
final int startcol = job.startcol;
final Progress jobAct = singleColAct
.getChildProgress((float) job.columns.size()
/ (float) colcnt);
executor.startExecution(job, new JobListener() {
@Override
public void jobFinished(JobResults result) {
} // doesn't happen here
@Override
public void jobFinished(List<ResultBundle> result) {
} // doesn't happen here
@Override
public void jobFinished(double[][] result) {
for (int col = 0; col < result.length; col++) {
probmatrix[col + startcol][col + startcol] = result[col][0];
}
finishedphylojobscount.incrementAndGet();
jobAct.setProgress(1.0);
}
});
}
Progress doubleColAct = act.getChildProgress(0.9);
// execute double column jobs
for (int jobnr = 1; jobnr < jobs.size(); jobnr++) {
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
PhyloJob job = jobs.get(jobnr);
//final int jn = jobnr;
final int startcol = job.startcol;
final int endcol = job.endcol;
final Progress jobAct = doubleColAct
.getChildProgress((float) job.columns.size()
/ (float) length);
if (job.columns2.size() != 0) {
executor.startExecution(job, new JobListener() {
@Override
public void jobFinished(JobResults result) {
} // doesn't happen here
@Override
public void jobFinished(List<ResultBundle> result) {
} // doesn't happen here
@Override
public void jobFinished(double[][] result) {
for (int col1 = startcol; col1 < endcol + 1; col1++) {
for (int col2 = col1 + 1; col2 < length; col2++) {
probmatrix[col1][col2] = result[col1 - startcol][col2
- startcol - 1];
probmatrix[col2][col1] = probmatrix[col1][col2]; // make
// it
// symmetric
}
}
finishedphylojobscount.incrementAndGet();
jobAct.setProgress(1.0);
}
});
} else {
finishedphylojobscount.incrementAndGet();
jobAct.setProgress(1.0);
}
}
// wait for last thread to finish
// wait for jobs to finish
while (finishedphylojobscount.get() < jobs.size()) {
Thread.sleep(100);
if(act.shouldStop()){
executor.shutDown();
}
act.checkStop();
}
singleColAct.setProgress(1.0);
doubleColAct.setProgress(1.0);
act.setProgress(1.0);
//act.setCurrentActivity("Evolutionary model: done");
if (verbose) {
System.out.println("Done. (time: "
+ (System.nanoTime() - starttime) * 1e-9 + " s)");
System.out.println("Memory allocated: "
+ (Runtime.getRuntime().totalMemory() / 1048576) + " MB");
System.out.println("Memory used: "
+ ((Runtime.getRuntime().totalMemory() - Runtime
.getRuntime().freeMemory()) / 1048576) + " MB ");
}
System.out.println("TOTAL TIME ELAPSED IN PHYLOGENETIC PART: "
+ (int)((System.nanoTime() - starttime) * 1e-9) + " seconds ");
if(verbose){
System.out.println(" ...of which distributed: "
+ (System.nanoTime() - gridstarttime) * 1e-9 + " seconds");
}
//System.out.println();
// Result contains the a priori probability distribution matrix.
return probmatrix;
}
static Sector findSector(int i, int j, Sector bottom) {
// finds the sector in which the point i,j is located
Sector current = bottom;
// first find vertical position
while (current.next != null) {
if (i >= current.pos[0]
&& (i < current.next.pos[0] || current.next.pos[0] == 0)) {
break;
}
current = current.next;
}
// current now has the right column
while (current.above != null) {
if (j >= current.pos[1] - (i - current.pos[0])
&& j < current.above.pos[1] - (i - current.pos[0])) {
break;
}
current = current.above;
}
return current;
}
static int[] findPointST(int i, int j, Sector sector, int distance) {
int result[] = new int[2];
// lower half of the triangle, just return whatever sector is the one
result[0] = sector.pos[0] + distance - i - 1;
result[1] = j + i - (sector.pos[1] + sector.pos[0]);
return result;
}
static int pairs(int a, int b) {
int answer = 0;
for (int c1 = 0; c1 < a; c1++) {
for (int c2 = c1 + 1; c2 < b; c2++) {
answer++;
}
}
return answer;
}
static double log2(double val){
return Math.log(val)/LOG_TWO;
}
}