package com.ppfold.main;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import com.ppfold.algo.AsynchronousJobExecutor;
import com.ppfold.algo.AsynchronousJobExecutorThreadPool;
import com.ppfold.algo.FoldingProject;
import com.ppfold.algo.FuzzyAlignment;
import com.ppfold.algo.FuzzyNucleotide;
import com.ppfold.algo.MatrixTools;
import com.ppfold.algo.NeighbourJoining;
import com.ppfold.algo.NullProgress;
import com.ppfold.algo.Parameters;
import com.ppfold.algo.Progress;
import com.ppfold.algo.ResultBundle;
import com.ppfold.algo.Tree;
import com.ppfold.algo.extradata.ExtraData;
import com.ppfold.algo.extradata.ExtraDataBars;
import com.ppfold.algo.extradata.ExtraDataProbMapping;
import com.ppfold.algo.extradata.ForcedConstraints;
public class PPfoldMain implements Runnable {
//Global variables are set by GUI or command-line arguments.
static String versionnumber = "3.0";
static String treefilename;
static String alignmentfilename;
static String exportfilehandle;
static String seqexportname = null; //if not null, export stuff specific to this sequence.
static String paramfilename;
static String paramresname = "data/ppfold-matrices.dat";
static String outputdir;
static String defaultDataDistfile = "data/ppfold-dist.dat"; //Default data file
static boolean auxdata = false;
static List<DataInfo> datainfo = new ArrayList<DataInfo>();
static boolean createtree = true;
static boolean optimizetree = true;
static boolean verbose = false;
static boolean exportson = false;
static boolean onlyCT = false;
static boolean specialname = false;
static int scfgdivisions = Runtime.getRuntime().availableProcessors()*8;
static int phylodivisions = Runtime.getRuntime().availableProcessors()*2;
static int nrprocessors = Runtime.getRuntime().availableProcessors();
static int iterlimit = 10;
static boolean entropycalc = true;
static String entropyString = null;
static boolean gui = false;
static boolean userfinished = false; //this will be triggered when the user presses the START button
static private Progress progress; //Progressbar; either NullActivity (if no GUI), or the PPfoldProgressBar (if GUI)
static public boolean shouldstop = false; //this will be triggered when user presses STOP button
static public boolean isstopping = false;
//Reset for each project
public boolean foldingfinished = false; //thread will stop when folding is finished.
public boolean success = true;
public String errormessage = ""; //if it fails, this will be the output to the user
public void cleanUp(){
//Put things here to save memory
errormessage = null;
}
@Override
public void run() {
try{
System.out.println("Attempting to read and parse files...");
System.out.println(alignmentfilename);
File alignfile = new File (alignmentfilename);
if(!specialname){
exportfilehandle = alignfile.getName();
int lastdotpos = exportfilehandle.lastIndexOf('.');
exportfilehandle = exportfilehandle.substring(0,lastdotpos);
}
Alignment align = AlignmentReader.readAlignment(alignmentfilename);
List<ExtraData> extradata = new ArrayList<ExtraData>();
BufferedReader paramFileReader = null;
Parameters param;
try {
if (paramfilename != null) {
File file = new File(paramfilename);
paramFileReader = new BufferedReader(new InputStreamReader(
new FileInputStream(file)));
}else{
paramFileReader = new BufferedReader(new InputStreamReader(Thread.currentThread().getContextClassLoader().getResourceAsStream(paramresname)));
}
param = Parameters.readParam(paramFileReader);
} catch (Exception e) {
throw new Exception("Error reading parameter file! Check that the file name and format are OK.");
}
finally {
if (paramFileReader != null) {
try {
paramFileReader.close();
}
catch (IOException e) {
throw new Exception("Error closing parameter file.");
}
}
}
Tree tree;
if(!createtree){
System.out.println(treefilename);
tree = NewickReader.readNewick(treefilename);
}
else{
tree=null;
}
if(auxdata){
for(int dataID = 0; dataID<datainfo.size(); dataID++){
ExtraData sequenceData = null;
DataInfo data = datainfo.get(dataID);
switch(data.getType()){
case 0:
BufferedInputStream shapeDistReader;
if(data.getDistFileName().equals(defaultDataDistfile)){
shapeDistReader = new BufferedInputStream(Thread.currentThread().getContextClassLoader().getResourceAsStream(defaultDataDistfile));
}
else{
shapeDistReader = new BufferedInputStream(new FileInputStream(data.getDistFileName()));
}
sequenceData = ExtraDataBars.readDistTable_toStream(shapeDistReader);
break;
case 1:
sequenceData = new ExtraDataProbMapping();
break;
case 2:
sequenceData = new ForcedConstraints();
break;
default:
throw new IllegalArgumentException("Data type could not be interpreted!");
}
//Find the sequence for which the SHAPE values are known (but only if the name string is not null)
int seqID = -1;
if(data.getSequenceName() != null){
for(int i = 0; i<align.getNames().size(); i++){
if(data.getSequenceName().trim().equals(align.getNames().get(i).trim())){
seqID = i;
data.setSequenceID(seqID);
break;
}
}
if(seqID==-1){
throw new IllegalArgumentException("Sequence with name " + data.getSequenceName() + " could not be found!");
}
}
if(data.getFileName()!=null){ //if null then only contact distance
BufferedInputStream shapeFileReader = new BufferedInputStream(new FileInputStream(data.getFileName()));
sequenceData.readData_toStream(shapeFileReader, align.calculateLength(seqID));
sequenceData.transformToAlignment(align.getSequences().get(seqID));
}
if(sequenceData instanceof ForcedConstraints && data.getContactDistance()!=-1){
((ForcedConstraints) sequenceData).setContactDistance(data.getContactDistance());
}
extradata.add(sequenceData);
}
}
System.out.println("All files read.");
if(outputdir == null){
File dir1 = new File (".");
try {
outputdir = dir1.getCanonicalPath();
} catch (IOException e1) {
System.out.println("Couldn't open directory ");
throw new Exception(e1);
}
}
System.out.println("Results will be written to " + outputdir);
System.out.println("Starting algorithm...");
if(seqexportname!=null){
System.out.println("Special sequence exported: " + seqexportname + " with prefix " + exportfilehandle + "-" + seqexportname);
}
//Generate progress if GUI does not take care of it
if(!gui){
progress = NullProgress.INSTANCE;
}
progress.setCurrentActivity("Preparing folding...");
fold(progress, align.getSequences(), align.getNames(), tree, param, extradata);
System.out.println();
System.out.println("FINISHED ");
success = true;
foldingfinished = true;
errormessage = "";
}
catch(InterruptedException e){
System.out.println("Folding stopped.");
isstopping=true;
}
catch(OutOfMemoryError e){
System.out.println("Oops! The Java Virtual Machine ran out of memory!");
System.out.println("Please increase the heap size.");
success = false;
String text = new String("Oops! The Java Virtual Machine ran out of memory! (Currently allocated: "+
(Runtime.getRuntime().totalMemory() / 1048576) + " MB.) "+
"You can increase the virtual machine heap size by adding the argument -Xmx[size]. " +
"E.g. to run PPfold with 256 MB heap size: \n\n" +
"java -Xmx256m -jar PPfold.jar \n\nFor debugging purposes, the stack trace is below. \n");
text.concat(e.toString()+":\n");
for(StackTraceElement s:e.getStackTrace()){
text = text.concat(s.toString()+"\n");
}
errormessage = text;
foldingfinished=true;
if(!gui){
e.printStackTrace();
}
}
catch(Exception e){
System.err.println("Fatal error");
success = false;
String text = new String("");
for(StackTraceElement s:e.getStackTrace()){
text = text.concat(s.toString()+"\n");
}
errormessage += e.toString()+":\n"+text;
foldingfinished=true;
//if(!gui){
// e.printStackTrace();
//}
}
}
public static ResultBundle fold(Progress progress, List<String> sequences, List<String> names, Tree tree,
Parameters param, List<ExtraData> extradata) throws InterruptedException, Exception{
//PPfoldResult ppfoldResult = new PPfoldResult();
AsynchronousJobExecutor executor = new AsynchronousJobExecutorThreadPool(nrprocessors);
progress.setProgress(0.0);
List<char[]> columns = new ArrayList<char[]>();
//each element represents a *column* of the alignment
List<char[]> fullcolumns = new ArrayList<char[]>();
//this will contain the FULL alignment (for outputting later)
int nrseq = sequences.size();
char[] thiscolumn = new char[nrseq];
char[] finalstructure = new char[sequences.get(0).length()];
float[] finalreliability = new float[sequences.get(0).length()];
float[][] finalmatrix = new float[sequences.get(0).length()][sequences.get(0).length()];
float[][] finalexp = new float[sequences.get(0).length()][sequences.get(0).length()];
List<Integer> leftoutcolumns = new ArrayList<Integer>();
if(!entropycalc||entropyString==null||(entropyString!=null&&entropyString.trim().equals(""))){
for(int i = 0; i < sequences.get(0).length(); i++){
//iterates column number
int gapscounter=0;
for(int j=0; j<nrseq; j++){
//iterates sequences
thiscolumn[j]=(char) sequences.get(j).charAt(i);
if(MatrixTools.isGap(thiscolumn[j])){gapscounter++;}
//System.out.println("Calculating column nr. " + j + " taking from seq " + j + " charAt " + i + " which is " + thiscolumn[j]);
}
boolean isoktoremove = true;
fullcolumns.add(thiscolumn.clone());
//May only remove a column if it is not associated with exp data.
isoktoremove = true;
if(!extradata.isEmpty()){
for(int k = 0; k<extradata.size(); k++){
if(!extradata.get(k).isEmpty(i)){
isoktoremove = false;
break;
}
}
}
if((float)gapscounter/nrseq >= 0.25 && isoktoremove){
finalstructure[i] = '.';
finalreliability[i] = 0;
leftoutcolumns.add(i);
//System.out.println("Leaving out column: " + i);
}
else{
columns.add(thiscolumn.clone());
}
}
}
else{
//If entropy is calculated after a specific sequence, then remove columns where there are gaps in
//other sequences
System.out.println("Removing gaps after sequence name: " + entropyString);
//Find the sequence:
int id = -1;
for(id = 0; id<names.size(); id++){
if(entropyString.trim().equals(names.get(id).trim())){
break;
}
}
if(id==-1){
throw new Exception("Sequence with name " + entropyString + " could not be found!");
}
//Sequences.get(id) is the one that shouldn't have gaps
for(int i = 0; i < sequences.get(0).length(); i++){
//iterates column number
for(int j=0; j<nrseq; j++){
//iterates sequences
thiscolumn[j]=(char) sequences.get(j).charAt(i);
}
fullcolumns.add(thiscolumn.clone());
//Remove column
if(MatrixTools.isGap(sequences.get(id).charAt(i))){
finalstructure[i] = '.';
finalreliability[i] = 0;
leftoutcolumns.add(i);
}
else{
//Keep column.
columns.add(thiscolumn.clone());
}
}
}
//Remove left out columns from Extra data as well
for(ExtraData extrad:extradata){
extrad.removeColumns(leftoutcolumns);
}
System.out.println("Total number of columns predicting structure on: " + columns.size());
//Convert character array columns to integer array columns
List<int[]> columns_int = new ArrayList<int[]>();
for (int i = 0; i < fullcolumns.size(); i++) {
columns_int.add(MatrixTools.convertColumn(fullcolumns.get(i)));
}
Progress activity = progress.getChildProgress(0.05);
if(tree==null){
try {
//System.out.println("Creating tree by neighbour joining... ");
tree = NeighbourJoining.generateTreeNJ(activity, sequences,columns_int, names, param);
//tree.print();
}catch (InterruptedException e) {
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
}
activity.setProgress(1.0);
System.out.println("Checking data...");
boolean isInputOK = FoldingProject.checkInput(tree, columns, names);
if(!isInputOK){
throw new IllegalArgumentException("There are problems with the input. Check that: " +
"\n - Alignment rows all have the same length. " +
"\n - The names of nodes in the tree match the names in the alignment. " +
"\n - There are no illegal characters in any of the sequences.");
}
System.out.println("Checking finished");
Progress activity2 = progress.getChildProgress(0.05);
if(optimizetree){
//System.out.println("Optimizing branch lengths...");
try {
tree.optimizeBranchLengths(activity2, columns_int,fullcolumns,names,param, iterlimit);
} catch (InterruptedException e) {
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
//if(!onlyCT){
//ExportTools.writeTree(outputdir, exportfilehandle, ".newick", tree);
//}
tree.print();
}
activity2.setProgress(1.0);
//Leave 2% for outputting
Progress activity3 = progress.getChildProgress(0.88);
ResultBundle result = null;
try{
result = FoldingProject.fold(activity3, phylodivisions,scfgdivisions, tree,
columns, names, param, executor, verbose,1,extradata,false,entropycalc);
}
catch(InterruptedException e){
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
activity3.setProgress(1.0);
//ExportTools.writeCTFormat(outputdir, exportfilehandle+"_RED", ".ct", columns, result.getStructure(), result.getReliability());
//ExportTools.writeStructureReliability(outputdir, exportfilehandle+"_RED", ".st", result.getStructure(), result.getReliability());
System.out.println("Structure prediction complete. Transforming to include left-out columns " +
"(total " + leftoutcolumns.size() + ")...");
progress.setCurrentActivity("Finalizing...");
int icolcnt = 0;
for(int i = 0; i<result.getBasePairProb().length; i++){
while(leftoutcolumns.indexOf(i+icolcnt)!=-1){
// System.out.println("i = " + i);
icolcnt++;
finalmatrix[i][i] = 0;
finalexp[i][i] = 0;
//continue;
}
//System.out.println("probmatrix: " + i + ", finalmatrix coord: " + (i+icolcnt));
int jcolcnt = 0;
for(int j = 0; j<result.getBasePairProb().length; j++){
//System.out.println("j = " + j);
while(leftoutcolumns.indexOf(j+jcolcnt)!=-1){
finalmatrix[j][j] = 0;
jcolcnt++;
//continue;
}
finalmatrix[i+icolcnt][j+jcolcnt] = result.getBasePairProb()[i][j];
finalexp[i+icolcnt][j+jcolcnt] = result.getExpectation()[i][j];
}
}
progress.setCurrentActivity("Folding finished.");
progress.setProgress(1.0);
result.finalmatrix = finalmatrix;
return result;
}
public static ResultBundle fold2(Progress progress, List<String> sequences, List<String> names, Tree tree,
Parameters param, List<ExtraData> extradata) throws InterruptedException, Exception{
//PPfoldResult ppfoldResult = new PPfoldResult();
AsynchronousJobExecutor executor = new AsynchronousJobExecutorThreadPool(nrprocessors);
progress.setProgress(0.0);
List<char[]> columns = new ArrayList<char[]>();
//each element represents a *column* of the alignment
List<char[]> fullcolumns = new ArrayList<char[]>();
//this will contain the FULL alignment (for outputting later)
int nrseq = sequences.size();
char[] thiscolumn = new char[nrseq];
char[] finalstructure = new char[sequences.get(0).length()];
float[] finalreliability = new float[sequences.get(0).length()];
float[][] finalmatrix = new float[sequences.get(0).length()][sequences.get(0).length()];
float[][] finalexp = new float[sequences.get(0).length()][sequences.get(0).length()];
List<Integer> leftoutcolumns = new ArrayList<Integer>();
if(!entropycalc||entropyString==null||(entropyString!=null&&entropyString.trim().equals(""))){
for(int i = 0; i < sequences.get(0).length(); i++){
//iterates column number
int gapscounter=0;
for(int j=0; j<nrseq; j++){
//iterates sequences
thiscolumn[j]=(char) sequences.get(j).charAt(i);
if(MatrixTools.isGap(thiscolumn[j])){gapscounter++;}
//System.out.println("Calculating column nr. " + j + " taking from seq " + j + " charAt " + i + " which is " + thiscolumn[j]);
}
boolean isoktoremove = true;
fullcolumns.add(thiscolumn.clone());
//May only remove a column if it is not associated with exp data.
isoktoremove = true;
if(!extradata.isEmpty()){
for(int k = 0; k<extradata.size(); k++){
if(!extradata.get(k).isEmpty(i)){
isoktoremove = false;
break;
}
}
}
if((float)gapscounter/nrseq >= 0.25 && isoktoremove){
finalstructure[i] = '.';
finalreliability[i] = 0;
leftoutcolumns.add(i);
//System.out.println("Leaving out column: " + i);
}
else{
columns.add(thiscolumn.clone());
}
}
}
else{
//If entropy is calculated after a specific sequence, then remove columns where there are gaps in
//other sequences
System.out.println("Removing gaps after sequence name: " + entropyString);
//Find the sequence:
int id = -1;
for(id = 0; id<names.size(); id++){
if(entropyString.trim().equals(names.get(id).trim())){
break;
}
}
if(id==-1){
throw new Exception("Sequence with name " + entropyString + " could not be found!");
}
//Sequences.get(id) is the one that shouldn't have gaps
for(int i = 0; i < sequences.get(0).length(); i++){
//iterates column number
for(int j=0; j<nrseq; j++){
//iterates sequences
thiscolumn[j]=(char) sequences.get(j).charAt(i);
}
fullcolumns.add(thiscolumn.clone());
//Remove column
if(MatrixTools.isGap(sequences.get(id).charAt(i))){
finalstructure[i] = '.';
finalreliability[i] = 0;
leftoutcolumns.add(i);
}
else{
//Keep column.
columns.add(thiscolumn.clone());
}
}
}
//Remove left out columns from Extra data as well
for(ExtraData extrad:extradata){
extrad.removeColumns(leftoutcolumns);
}
System.out.println("Total number of columns predicting structure on: " + columns.size());
//Convert character array columns to integer array columns
List<int[]> columns_int = new ArrayList<int[]>();
for (int i = 0; i < fullcolumns.size(); i++) {
columns_int.add(MatrixTools.convertColumn(fullcolumns.get(i)));
}
Progress activity = progress.getChildProgress(0.05);
if(tree==null){
try {
//System.out.println("Creating tree by neighbour joining... ");
tree = NeighbourJoining.generateTreeNJ(activity, sequences,columns_int, names, param);
//tree.print();
}catch (InterruptedException e) {
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
}
activity.setProgress(1.0);
System.out.println("Checking data...");
boolean isInputOK = FoldingProject.checkInput(tree, columns, names);
if(!isInputOK){
throw new IllegalArgumentException("There are problems with the input. Check that: " +
"\n - Alignment rows all have the same length. " +
"\n - The names of nodes in the tree match the names in the alignment. " +
"\n - There are no illegal characters in any of the sequences.");
}
System.out.println("Checking finished");
Progress activity2 = progress.getChildProgress(0.05);
if(optimizetree){
//System.out.println("Optimizing branch lengths...");
try {
tree.optimizeBranchLengths(activity2, columns_int,fullcolumns,names,param, iterlimit);
} catch (InterruptedException e) {
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
//if(!onlyCT){
//ExportTools.writeTree(outputdir, exportfilehandle, ".newick", tree);
//}
tree.print();
}
activity2.setProgress(1.0);
//Leave 2% for outputting
Progress activity3 = progress.getChildProgress(0.88);
ResultBundle result = null;
try{
result = FoldingProject.fold(activity3, phylodivisions,scfgdivisions, tree,
columns, names, param, executor, verbose,1,extradata,false,entropycalc);
}
catch(InterruptedException e){
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
activity3.setProgress(1.0);
//ExportTools.writeCTFormat(outputdir, exportfilehandle+"_RED", ".ct", columns, result.getStructure(), result.getReliability());
//ExportTools.writeStructureReliability(outputdir, exportfilehandle+"_RED", ".st", result.getStructure(), result.getReliability());
System.out.println("Structure prediction complete. Transforming to include left-out columns " +
"(total " + leftoutcolumns.size() + ")...");
progress.setCurrentActivity("Finalizing...");
int icolcnt = 0;
for(int i = 0; i<result.getBasePairProb().length; i++){
while(leftoutcolumns.indexOf(i+icolcnt)!=-1){
// System.out.println("i = " + i);
icolcnt++;
finalmatrix[i][i] = 0;
finalexp[i][i] = 0;
//continue;
}
//System.out.println("probmatrix: " + i + ", finalmatrix coord: " + (i+icolcnt));
int jcolcnt = 0;
for(int j = 0; j<result.getBasePairProb().length; j++){
//System.out.println("j = " + j);
while(leftoutcolumns.indexOf(j+jcolcnt)!=-1){
finalmatrix[j][j] = 0;
jcolcnt++;
//continue;
}
finalmatrix[i+icolcnt][j+jcolcnt] = result.getBasePairProb()[i][j];
finalexp[i+icolcnt][j+jcolcnt] = result.getExpectation()[i][j];
}
}
progress.setCurrentActivity("Folding finished.");
progress.setProgress(1.0);
result.finalmatrix = finalmatrix;
result.leftOutColumns = leftoutcolumns;
return result;
}
public static ResultBundle foldMatrix(Progress progress, List<String> sequences, List<String> names, double [][] inputMatrix,
Parameters param, List<ExtraData> extradata) throws InterruptedException, Exception{
//PPfoldResult ppfoldResult = new PPfoldResult();
AsynchronousJobExecutor executor = new AsynchronousJobExecutorThreadPool(nrprocessors);
progress.setProgress(0.0);
List<char[]> columns = new ArrayList<char[]>();
//each element represents a *column* of the alignment
List<char[]> fullcolumns = new ArrayList<char[]>();
//this will contain the FULL alignment (for outputting later)
int nrseq = sequences.size();
char[] thiscolumn = new char[nrseq];
char[] finalstructure = new char[sequences.get(0).length()];
float[] finalreliability = new float[sequences.get(0).length()];
float[][] finalmatrix = new float[sequences.get(0).length()][sequences.get(0).length()];
float[][] finalexp = new float[sequences.get(0).length()][sequences.get(0).length()];
List<Integer> leftoutcolumns = new ArrayList<Integer>(); // will always be empty in this method
if(!entropycalc||entropyString==null||(entropyString!=null&&entropyString.trim().equals(""))){
for(int i = 0; i < sequences.get(0).length(); i++){
//iterates column number
int gapscounter=0;
for(int j=0; j<nrseq; j++){
//iterates sequences
thiscolumn[j]=(char) sequences.get(j).charAt(i);
if(MatrixTools.isGap(thiscolumn[j])){gapscounter++;}
//System.out.println("Calculating column nr. " + j + " taking from seq " + j + " charAt " + i + " which is " + thiscolumn[j]);
}
boolean isoktoremove = true;
fullcolumns.add(thiscolumn.clone());
//May only remove a column if it is not associated with exp data.
isoktoremove = true;
if(!extradata.isEmpty()){
for(int k = 0; k<extradata.size(); k++){
if(!extradata.get(k).isEmpty(i)){
isoktoremove = false;
break;
}
}
}
if((float)gapscounter/nrseq >= 0.25 && isoktoremove){
finalstructure[i] = '.';
finalreliability[i] = 0;
//leftoutcolumns.add(i);
//System.out.println("Leaving out column: " + i);
}
else{
columns.add(thiscolumn.clone());
}
}
}
else{
//If entropy is calculated after a specific sequence, then remove columns where there are gaps in
//other sequences
System.out.println("Removing gaps after sequence name: " + entropyString);
//Find the sequence:
int id = -1;
for(id = 0; id<names.size(); id++){
if(entropyString.trim().equals(names.get(id).trim())){
break;
}
}
if(id==-1){
throw new Exception("Sequence with name " + entropyString + " could not be found!");
}
//Sequences.get(id) is the one that shouldn't have gaps
for(int i = 0; i < sequences.get(0).length(); i++){
//iterates column number
for(int j=0; j<nrseq; j++){
//iterates sequences
thiscolumn[j]=(char) sequences.get(j).charAt(i);
}
fullcolumns.add(thiscolumn.clone());
//Remove column
if(MatrixTools.isGap(sequences.get(id).charAt(i))){
finalstructure[i] = '.';
finalreliability[i] = 0;
//leftoutcolumns.add(i);
}
else{
//Keep column.
columns.add(thiscolumn.clone());
}
}
}
//Remove left out columns from Extra data as well
for(ExtraData extrad:extradata){
//extrad.removeColumns(leftoutcolumns);
}
System.out.println("Total number of columns predicting structure on: " + columns.size());
//Convert character array columns to integer array columns
List<int[]> columns_int = new ArrayList<int[]>();
for (int i = 0; i < fullcolumns.size(); i++) {
columns_int.add(MatrixTools.convertColumn(fullcolumns.get(i)));
}
/*Progress activity = progress.getChildProgress(0.05);
if(tree==null){
try {
//System.out.println("Creating tree by neighbour joining... ");
tree = NeighbourJoining.generateTreeNJ(activity, sequences,columns_int, names, param);
//tree.print();
}catch (InterruptedException e) {
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
}
activity.setProgress(1.0);
System.out.println("Checking data...");
boolean isInputOK = FoldingProject.checkInput(tree, columns, names);
if(!isInputOK){
throw new IllegalArgumentException("There are problems with the input. Check that: " +
"\n - Alignment rows all have the same length. " +
"\n - The names of nodes in the tree match the names in the alignment. " +
"\n - There are no illegal characters in any of the sequences.");
}
System.out.println("Checking finished");
Progress activity2 = progress.getChildProgress(0.05);
if(optimizetree){
//System.out.println("Optimizing branch lengths...");
try {
tree.optimizeBranchLengths(activity2, columns_int,fullcolumns,names,param, iterlimit);
} catch (InterruptedException e) {
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
//if(!onlyCT){
//ExportTools.writeTree(outputdir, exportfilehandle, ".newick", tree);
//}
tree.print();
}
activity2.setProgress(1.0);*/
//Leave 2% for outputting
Progress activity3 = progress.getChildProgress(0.88);
ResultBundle result = null;
try{
result = FoldingProject.foldMatrix(activity3, phylodivisions,scfgdivisions,
columns, names, inputMatrix, param, executor, verbose,1,extradata,false,entropycalc);
}
catch(InterruptedException e){
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
activity3.setProgress(1.0);
//ExportTools.writeCTFormat(outputdir, exportfilehandle+"_RED", ".ct", columns, result.getStructure(), result.getReliability());
//ExportTools.writeStructureReliability(outputdir, exportfilehandle+"_RED", ".st", result.getStructure(), result.getReliability());
/*
System.out.println("Structure prediction complete. Transforming to include left-out columns " +
"(total " + leftoutcolumns.size() + ")...");
*/
progress.setCurrentActivity("Finalizing...");
int icolcnt = 0;
for(int i = 0; i<result.getBasePairProb().length; i++){
while(leftoutcolumns.indexOf(i+icolcnt)!=-1){
// System.out.println("i = " + i);
icolcnt++;
finalmatrix[i][i] = 0;
finalexp[i][i] = 0;
//continue;
}
//System.out.println("probmatrix: " + i + ", finalmatrix coord: " + (i+icolcnt));
int jcolcnt = 0;
for(int j = 0; j<result.getBasePairProb().length; j++){
//System.out.println("j = " + j);
while(leftoutcolumns.indexOf(j+jcolcnt)!=-1){
finalmatrix[j][j] = 0;
jcolcnt++;
//continue;
}
finalmatrix[i+icolcnt][j+jcolcnt] = result.getBasePairProb()[i][j];
finalexp[i+icolcnt][j+jcolcnt] = result.getExpectation()[i][j];
}
}
progress.setCurrentActivity("Folding finished.");
progress.setProgress(1.0);
result.finalmatrix = finalmatrix;
return result;
}
public static ResultBundle foldFuzzyAlignment(Progress progress, FuzzyAlignment fuzzyAlignment, Tree tree,
Parameters param, List<ExtraData> extradata, boolean useExpectedFrequencies) throws InterruptedException, Exception{
fuzzyAlignment.useExpectedFrequencies = useExpectedFrequencies;
AsynchronousJobExecutor executor = new AsynchronousJobExecutorThreadPool(nrprocessors);
progress.setProgress(0.0);
List<char[]> columns = new ArrayList<char[]>();
List<FuzzyNucleotide[]> fuzzyColumns = new ArrayList<FuzzyNucleotide[]>();
//each element represents a *column* of the alignment
List<char[]> fullcolumns = new ArrayList<char[]>();
//this will contain the FULL alignment (for outputting later)
int nrseq = fuzzyAlignment.getNumSequences();
//FuzzyNucleotide[] thiscolumn = new FuzzyNucleotide[nrseq];
char[] thiscolumn = new char[nrseq];
FuzzyNucleotide[] thisFuzzyColumn = new FuzzyNucleotide[nrseq];
char[] finalstructure = new char[fuzzyAlignment.sequences.get(0).length()];
float[] finalreliability = new float[fuzzyAlignment.sequences.get(0).length()];
float[][] finalmatrix = new float[fuzzyAlignment.sequences.get(0).length()][fuzzyAlignment.sequences.get(0).length()];
float[][] finalexp = new float[fuzzyAlignment.sequences.get(0).length()][fuzzyAlignment.sequences.get(0).length()];
List<Integer> leftoutcolumns = new ArrayList<Integer>();
if(!entropycalc||entropyString==null||(entropyString!=null&&entropyString.trim().equals(""))){
for(int i = 0; i < fuzzyAlignment.length ; i++){
//iterates column number
int gapscounter=0;
for(int j=0; j<nrseq; j++){
//iterates sequences
thiscolumn[j]= fuzzyAlignment.sequences.get(j).charAt(i);
thisFuzzyColumn[j]= fuzzyAlignment.columns.get(i)[j].clone();
if(MatrixTools.isGap(thiscolumn[j])){gapscounter++;}
//System.out.println("Calculating column nr. " + j + " taking from seq " + j + " charAt " + i + " which is " + thiscolumn[j]);
}
boolean isoktoremove = true;
fullcolumns.add(thiscolumn.clone());
//mapping.add(i);
//May only remove a column if it is not associated with exp data.
isoktoremove = true;
if(!extradata.isEmpty()){
for(int k = 0; k<extradata.size(); k++){
if(!extradata.get(k).isEmpty(i)){
isoktoremove = false;
break;
}
}
}
if((float)gapscounter/nrseq >= 0.25 && isoktoremove){
finalstructure[i] = '.';
finalreliability[i] = 0;
leftoutcolumns.add(i);
//System.out.println("Leaving out column: " + i);
}
else{
columns.add(thiscolumn.clone());
fuzzyColumns.add(thisFuzzyColumn.clone());
}
}
}
else{
//If entropy is calculated after a specific sequence, then remove columns where there are gaps in
//other sequences
System.out.println("Removing gaps after sequence name: " + entropyString);
//Find the sequence:
int id = -1;
for(id = 0; id<fuzzyAlignment.names.size(); id++){
if(entropyString.trim().equals(fuzzyAlignment.names.get(id).trim())){
break;
}
}
if(id==-1){
throw new Exception("Sequence with name " + entropyString + " could not be found!");
}
//Sequences.get(id) is the one that shouldn't have gaps
for(int i = 0; i < fuzzyAlignment.length; i++){
//iterates column number
for(int j=0; j<nrseq; j++){
//iterates sequences
thiscolumn[j]=(char) fuzzyAlignment.sequences.get(j).charAt(i);
thisFuzzyColumn[j]= fuzzyAlignment.columns.get(i)[j].clone();
}
fullcolumns.add(thiscolumn.clone());
//Remove column
if(MatrixTools.isGap(fuzzyAlignment.sequences.get(id).charAt(i))){
finalstructure[i] = '.';
finalreliability[i] = 0;
leftoutcolumns.add(i);
}
else{
//Keep column.
columns.add(thiscolumn.clone());
fuzzyColumns.add(thisFuzzyColumn.clone());
}
}
}
//Remove left out columns from Extra data as well
for(ExtraData extrad:extradata){
extrad.removeColumns(leftoutcolumns);
}
System.out.println("Total number of columns predicting structure on: " + columns.size());
//Convert character array columns to integer array columns
List<int[]> columns_int = new ArrayList<int[]>();
for (int i = 0; i < fullcolumns.size(); i++) {
columns_int.add(MatrixTools.convertColumn(fullcolumns.get(i)));
}
Progress activity = progress.getChildProgress(0.05);
if(tree==null){
try {
//System.out.println("Creating tree by neighbour joining... ");
System.out.println("activity="+(activity == null));
System.out.println("columns_int="+(columns_int == null));
System.out.println("param="+(param == null));
tree = NeighbourJoining.generateTreeNJ(activity, fuzzyAlignment.sequences,columns_int, fuzzyAlignment.names, param);
//tree.print();
}catch (InterruptedException e) {
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
}
activity.setProgress(1.0);
System.out.println("Checking data...");
boolean isInputOK = FoldingProject.checkInput(tree, columns, fuzzyAlignment.names);
if(!isInputOK){
throw new IllegalArgumentException("There are problems with the input. Check that: " +
"\n - Alignment rows all have the same length. " +
"\n - The names of nodes in the tree match the names in the alignment. " +
"\n - There are no illegal characters in any of the sequences.");
}
System.out.println("Checking finished");
Progress activity2 = progress.getChildProgress(0.05);
if(optimizetree){
//System.out.println("Optimizing branch lengths...");
try {
tree.optimizeBranchLengths(activity2, columns_int,fullcolumns,fuzzyAlignment.names,param, iterlimit);
} catch (InterruptedException e) {
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
//if(!onlyCT){
//ExportTools.writeTree(outputdir, exportfilehandle, ".newick", tree);
//}
tree.print();
}
activity2.setProgress(1.0);
//Leave 2% for outputting
Progress activity3 = progress.getChildProgress(0.88);
ResultBundle result = null;
try{
ArrayList<Integer> mapping = new ArrayList<Integer>();
for(int i = 0 ; i < fuzzyAlignment.sequences.get(0).length() ; i++)
{
if(leftoutcolumns.indexOf(new Integer(i)) == -1)
{
mapping.add(i);
}
}
fuzzyAlignment.mapping = mapping;
//System.out.println(mapping);
//System.out.println(fuzzyColumns.size() + "\t"+columns.size());
//System.out.println(fuzzyColumns+ "\n"+columns);
result = FoldingProject.foldFuzzyAlignment(activity3, phylodivisions,scfgdivisions, tree,
fuzzyColumns, fuzzyAlignment, fuzzyAlignment.names, param, executor, verbose,1,extradata,false,entropycalc);
}
catch(InterruptedException e){
System.out.println("Process interrupted by user! Stopping...");
executor.shutDown();
while(!executor.isTerminated()){
Thread.sleep(100);
}
throw new InterruptedException();
}
activity3.setProgress(1.0);
//ExportTools.writeCTFormat(outputdir, exportfilehandle+"_RED", ".ct", columns, result.getStructure(), result.getReliability());
//ExportTools.writeStructureReliability(outputdir, exportfilehandle+"_RED", ".st", result.getStructure(), result.getReliability());
System.out.println("Structure prediction complete. Transforming to include left-out columns " +
"(total " + leftoutcolumns.size() + ")...");
progress.setCurrentActivity("Finalizing...");
int icolcnt = 0;
for(int i = 0; i<result.getBasePairProb().length; i++){
while(leftoutcolumns.indexOf(i+icolcnt)!=-1){
// System.out.println("i = " + i);
icolcnt++;
finalmatrix[i][i] = 0;
finalexp[i][i] = 0;
//continue;
}
//System.out.println("probmatrix: " + i + ", finalmatrix coord: " + (i+icolcnt));
int jcolcnt = 0;
for(int j = 0; j<result.getBasePairProb().length; j++){
//System.out.println("j = " + j);
while(leftoutcolumns.indexOf(j+jcolcnt)!=-1){
finalmatrix[j][j] = 0;
jcolcnt++;
//continue;
}
finalmatrix[i+icolcnt][j+jcolcnt] = result.getBasePairProb()[i][j];
finalexp[i+icolcnt][j+jcolcnt] = result.getExpectation()[i][j];
}
}
progress.setCurrentActivity("Folding finished.");
progress.setProgress(1.0);
result.finalmatrix = finalmatrix;
return result;
//System.out.println(finalmatrix[10][19]);
//System.out.println(result.getBasePairProb()[10][17]);
//could maybe implement a search-based solution using indexOf.
//Dunno which one is better/faster.
//This seems to work.
}
public boolean success() {
return this.success;
}
public static void setfoldingfinished(boolean set){
shouldstop = set;
}
public static void setProgressBar(PPfoldProgressBar progressin){
progress = progressin;
}
}