package statalign.postprocess.plugins; import java.awt.BorderLayout; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintStream; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import javax.swing.Icon; import javax.swing.ImageIcon; import javax.swing.JPanel; import javax.swing.JScrollPane; import statalign.base.InputData; import statalign.base.Mcmc; import statalign.base.State; import statalign.distance.Distance; import statalign.postprocess.Postprocess; import statalign.postprocess.PostprocessManager; import statalign.postprocess.gui.PPFoldGUI; import statalign.postprocess.plugins.benchmarks.Benchmarks; import statalign.postprocess.plugins.benchmarks.Dataset; import statalign.postprocess.utils.Mapping; import statalign.postprocess.utils.RNAFoldingTools; import statalign.postprocess.utils.RNAalifold; import com.ppfold.algo.AlignmentData; import com.ppfold.algo.FuzzyAlignment; import com.ppfold.algo.NullProgress; import com.ppfold.algo.Parameters; import com.ppfold.algo.Progress; import com.ppfold.algo.ResultBundle; import com.ppfold.algo.Tree; import com.ppfold.algo.extradata.ExtraData; import com.ppfold.main.Alignment; import com.ppfold.main.AlignmentReader; import com.ppfold.main.NewickReader; import com.ppfold.main.PPfoldMain; public class PPFold extends statalign.postprocess.Postprocess { public static ArrayList < ArrayList< ArrayList<String> > > readAllSamples(String path){ try { ArrayList < ArrayList< ArrayList<String> > > allTests = new ArrayList<ArrayList<ArrayList<String>>>(); File dir = new File(path); File[] sortedDir = dir.listFiles(); Arrays.sort(sortedDir); ArrayList<String> Alignment = new ArrayList<String>(); ArrayList< ArrayList<String> > Samples= new ArrayList<ArrayList<String>>(); for(File i : sortedDir){ Samples = new ArrayList<ArrayList<String>>(); BufferedReader br = new BufferedReader(new FileReader (i.getAbsoluteFile())); String line = br.readLine(); do{ if(line.compareTo("%") == 0){ Samples.add(Alignment); Alignment = new ArrayList<String>(); }else { Alignment.add(line); } }while( (line = br.readLine()) != null); allTests.add(Samples); br.close(); } return allTests; } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } // variables from ppfoldmain static private Progress progress = NullProgress.INSTANCE; public String title; JPanel pan = new JPanel(new BorderLayout()); PPFoldGUI gui; // private boolean sampling = true; CurrentAlignment curAlig; MpdAlignment mpdAlignment; ColumnNetwork network; Column firstVector, lastVector; int sizeOfAlignments; int[] firstDescriptor; String t[][]; String[] sequences; String[] viterbialignment; int d; int seqNo = 0; static String refSeqName = ""; String refSeq; public String refSeqGapped; double[][] summedBasePairProbMatrix; double[] summedSingleBaseProb; public float[][] probMatrix; double[][] summedBasePairProbRNAalifold; int noSamples; double [][] weightedBasePairProb; double beta = 10; double weightedSum = 0; double firstLikelihood = 0; double posteriorProbabilityAvg = 0; ArrayList<String> tempAlignment; ArrayList<Double> logLikelihood; public double entropyObs; public double entropyExp; public double entropySample; RNAFoldingTools rnaTools = new RNAFoldingTools(); ArrayList<Double> distanceList; public String outDir; PrintStream logFile; String rnaAlifoldParameters = ""; boolean samplingAndAveragingPPfold = false; boolean consensusEvolutionPrediction = true; boolean samplingAndAveragingRNAalifold = false; boolean fuzzyFolding = false; boolean experimental = false; public PPFold() { screenable = true; outputable = true; // postprocessable = true; // TODO might need to change to false // postprocessWrite = true;; // TODO might need to change to false rnaAssociated = true; Entropy.allowed = true; selected = false; } @Override public String getTabName() { return "Base-pairing matrix"; } @Override public Icon getIcon() { return new ImageIcon(ClassLoader.getSystemResource("icons/MPD.gif")); } @Override public JPanel getJPanel() { return pan; } @Override public String getTip() { return "Base-pairing matrix of the current consensus structure given by PPFold"; } @Override public double getTabOrder() { return 8.0d; } @Override public void setSampling(boolean enabled) { sampling = enabled; } @Override public String[] getDependencies() { return new String[] { "statalign.postprocess.plugins.CurrentAlignment", "statalign.postprocess.plugins.MpdAlignment"}; } @Override public void refToDependencies(Postprocess[] plugins) { curAlig = (CurrentAlignment) plugins[0]; mpdAlignment = (MpdAlignment) plugins[1]; } static Comparator<String[]> compStringArr = new Comparator<String[]>() { @Override public int compare(String[] a1, String[] a2) { return a1[0].compareTo(a2[0]); } }; List<ExtraData> extradata = new ArrayList<ExtraData>(); Parameters param = null; public void selectReferenceSequence(InputData input) { int maxLength = 0; refSeq = input.seqs.getSequence(0).replaceAll("-", ""); refSeqName = input.seqs.getSeqName(0); refSeqGapped = input.seqs.getSequence(0); maxLength = refSeq.length(); for (int i = 0; i < input.seqs.size(); i++) { String seq = input.seqs.getSequence(i).replaceAll("-", ""); if (seq.length() > maxLength) { maxLength = seq.length(); refSeq = seq; refSeqName = input.seqs.getSeqName(i); refSeqGapped = input.seqs.getSequence(i); seqNo = i; } } d = maxLength; } @Override public void beforeFirstSample(InputData input) { outDir = input.outputPath; title = input.title; if(input.pars.seed != 1) { title = input.title + "_seed"+input.pars.seed; } try { logFile = new PrintStream(new File(outDir+"/"+title+".ppfold.log")); } catch(FileNotFoundException e) { e.printStackTrace(); } selectReferenceSequence(input); if(PostprocessManager.pluginParameters != null) { // System.out.println("Parameters"); // pluginParameters.print(); String ppfoldParameter = PostprocessManager.pluginParameters.getParameter("ppfold"); String rnaAlifoldParameter = PostprocessManager.pluginParameters.getParameter("rnaalifold"); String fuzzyParameter = PostprocessManager.pluginParameters.getParameter("fuzzy"); samplingAndAveragingPPfold = ppfoldParameter != null; samplingAndAveragingRNAalifold = rnaAlifoldParameter != null; fuzzyFolding = false; if(samplingAndAveragingRNAalifold) { System.out.println("Using RNAalifold with parameters: \"" + rnaAlifoldParameter+"\""); String param = rnaAlifoldParameter.replaceAll("^\"", "").replaceAll("\"$", ""); String [] split = param.split("(\\s)+", 2); if(split.length > 0) { RNAalifold.executable = split[0]; if(split.length > 1) { rnaAlifoldParameters = split[1]; } } } if(PostprocessManager.pluginParameters.getParameter("experimental") != null) { System.out.println("Experimental RNA mode enabled."); // new File(outDir).mkdirs(); samplingAndAveragingPPfold = true; samplingAndAveragingRNAalifold = true; // TODO SHOULD CHANGE BACK TO TRUE fuzzyFolding = true; experimental = true; } if(samplingAndAveragingRNAalifold && !RNAalifold.checkRNAalifold()) { samplingAndAveragingRNAalifold = false; System.err.println("Disabling RNAalifold. Could not launch the executable, check that you have specified it correctly and have the latest version."); } } if(show) { pan.removeAll(); JScrollPane scroll = new JScrollPane(); scroll.getViewport().add(gui = new PPFoldGUI(title, scroll, this)); if(gui.getPreferredSize().getHeight() > gui.getHeight()) { scroll.createVerticalScrollBar(); } if(gui.getPreferredSize().getWidth() > gui.getWidth()) { scroll.createHorizontalScrollBar(); } pan.add(scroll, BorderLayout.CENTER); if(pan.getParent() != null) { pan.getParent().validate(); } gui.changeDimension(d); } sizeOfAlignments = (mcmc.tree.vertex.length + 1) / 2; noSamples = 0; t = new String[sizeOfAlignments][]; sequences = null; viterbialignment = new String[sizeOfAlignments]; network = new ColumnNetwork(); firstDescriptor = new int[sizeOfAlignments]; Arrays.fill(firstDescriptor, -1); firstVector = network.add(firstDescriptor); lastVector = null; viterbialignment = new String[sizeOfAlignments]; logLikelihood = new ArrayList<Double>(); // read in PPfold parameter file try { BufferedReader paramFileReader = null; if(param == null) { //File file = new File); paramFileReader = new BufferedReader(new InputStreamReader(ClassLoader.getSystemResourceAsStream("data/ppfold-matrices.dat"))); param = Parameters.readParam(paramFileReader); } } catch (IOException e) { e.printStackTrace(); } // initialise new dataset dataset = new Dataset(); String [] [] inputAlignment = new String[input.seqs.size()][2]; for(int k = 0 ; k < inputAlignment.length ; k++) { inputAlignment[k][0] = input.seqs.getSeqName(k); inputAlignment[k][1] = input.seqs.getSequence(k); } Arrays.sort(inputAlignment, compStringArr); dataset.inputAlignment = new AlignmentData(); for(int i = 0 ; i < inputAlignment.length ; i++) { dataset.inputAlignment.names.add(inputAlignment[i][0]); dataset.inputAlignment.sequences.add(inputAlignment[i][1]); } // // write sample file // try // { // boolean append = false; // BufferedWriter buffer = new BufferedWriter(new FileWriter(new File(outDir, title+".samples"), append)); // AlignmentData referenceAlignment = new AlignmentData(); // referenceAlignment.sequences = dataset.inputAlignment.sequences; // referenceAlignment.names = dataset.inputAlignment.names; // buffer.write("%reference\n"); // buffer.write(referenceAlignment.toString()); // buffer.close(); // } // catch(IOException ex) // { // ex.printStackTrace(); // } if(experimental) { // perform calculation on reference sample try { ResultBundle refResult = PPfoldMain.fold(progress, input.seqs.getSequences(), input.seqs.getSeqnames(), null, param, extradata); int [] refPairedSites = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(refResult.getFinalMatrix()); dataset.resultBundlePPfold = refResult.getSmallBundle(); dataset.pairedSitesPPfoldProjected = Benchmarks.projectPairedSites(RNAFoldingTools.getSequenceByName(refSeqName, input.seqs.getSequences(), input.seqs.getSeqnames()), refPairedSites); dataset.rnaAlifoldRef = RNAalifold.fold(input.seqs.getSequences(), input.seqs.getSeqnames(), rnaAlifoldParameters).getSmallResult(); dataset.pairedSitesRNAalifoldRefProjected = Benchmarks.projectPairedSites(RNAFoldingTools.getSequenceByName(refSeqName, input.seqs.getSequences(), input.seqs.getSeqnames()), dataset.rnaAlifoldRef.pairedSites); //dataset.pairedSitesRNAalifoldRefProjected = Benchmarks.projectPairedSites(refSeq, dataset.rnaAlifoldRef.pairedSites); //RNAFoldingTools.writeToFile(new File(outDir+title+"_ref_structure.txt"), ""+refResult.getPPfoldReliability()+"\n"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSites)+"\n", false); } catch (InterruptedException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } alignments = new ArrayList<AlignmentData>(); } public double calculateMPD(ArrayList<String> mpdSequences, ArrayList<String> mpdNames) { try { List<String> sequences = mpdSequences; List<String> seqNames = mpdNames; String refSeq = sequences.get(0).replaceAll("-", ""); String refSeqGapped = sequences.get(0); int maxLength = refSeq.length(); for (int i = 0; i < sequences.size(); i++) { String seq = sequences.get(i).replaceAll("-", ""); if (seq.length() > maxLength) { maxLength = seq.length(); refSeq = seq; refSeqName = seqNames.get(i); refSeqGapped = sequences.get(i); } } // // write sample file // try // { // boolean append = true; // BufferedWriter buffer = new BufferedWriter(new FileWriter(new File(outDir, title+".samples"), append)); // AlignmentData referenceAlignment = new AlignmentData(); // referenceAlignment.sequences = mpdSequences; // referenceAlignment.names = mpdNames; // buffer.write("%mpd\n"); // buffer.write(referenceAlignment.toString()); // buffer.close(); // } // catch(IOException ex) // { // ex.printStackTrace(); // } List<ExtraData> extradata = new ArrayList<ExtraData>(); ResultBundle mpdResult = PPfoldMain.fold(progress, mpdSequences, mpdNames, null, param, extradata); float [][] mpdBasePairProb = mpdResult.finalmatrix; int [] mpdPairedSites = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(mpdBasePairProb); //double ppfoldReliability2 = RNAFoldingTools.calculatePPfoldReliabilityScore(mpdPairedSites, RNAFoldingTools.getDoubleMatrix(mpdBasePairProb)); double ppfoldReliability = mpdResult.getPPfoldReliability(); int [] projectedPairedSites = Benchmarks.projectPairedSites(refSeqGapped, mpdPairedSites); dataset.resultBundleMPD = mpdResult.getSmallBundle(); dataset.pairedSitesMPD = projectedPairedSites; dataset.ppfoldReliabilityMPD = ppfoldReliability; dataset.pairsOnlyReliabilityMPD = mpdResult.getPairsOnlyReliability(); dataset.pairsOnlyMPDPosteriorWeighted = RNAFoldingTools.calculatePairsOnlyReliabilityScore(mpdPairedSites, RNAFoldingTools.getDoubleMatrix(mpdResult.finalmatrix), dataset.posteriors); if(samplingAndAveragingRNAalifold) { RNAalifoldResult rnaAlifoldMPDResult = RNAalifold.fold(mpdSequences, mpdNames, rnaAlifoldParameters); dataset.rnaAlifoldMPD = rnaAlifoldMPDResult.getSmallResult(); dataset.pairedSitesRNAalifoldMPDProjected = Benchmarks.projectPairedSites(refSeqGapped, rnaAlifoldMPDResult.pairedSites); } return ppfoldReliability; } catch(Exception ex) { ex.printStackTrace(); } return -1; } Dataset dataset = null; ArrayList<AlignmentData> alignments = new ArrayList<AlignmentData>(); public AlignmentData al; //String p2 = ""; //double finalEntropyExpReliabilityScore = -1; //double finalEntropyObsReliabilityScore = -1; double [][] averagedPhyloProbs; double [][] summedPhyloProbs; double [][] countPhyloProbs; double [] columnCounts; double n; ArrayList<Integer> leftOutColumns = new ArrayList<Integer>(); public static double [][] reconstituteMatrix(double [][] matrix, List<Integer> leftOutColumns) { double [] [] ret = new double[matrix.length+leftOutColumns.size()][matrix.length+leftOutColumns.size()]; int x = 0; int y = 0; for(int i = 0 ; i < ret.length ; i++) { if(!leftOutColumns.contains(i)) { y = 0; for(int j = 0 ; j < ret[0].length ; j++) { if(!leftOutColumns.contains(j)) { ret[i][j] = matrix[x][y]; y++; } } x++; } } return ret; } public static double [][] incrementCountMatrix(double [][] matrix, List<Integer> leftOutColumns) { for(int i = 0 ; i < matrix.length ; i++) { if(!leftOutColumns.contains(i)) { for(int j = 0 ; j < matrix.length ; j++) { if(!leftOutColumns.contains(j)) { matrix[i][j]++; } } } } return matrix; } public static double [][] leaveOutColumns(double [][] matrix, List<Integer> leftOutColumns) { double [] [] ret = new double[matrix.length-leftOutColumns.size()][matrix[0].length-leftOutColumns.size()]; System.out.println("LOC"+matrix.length+"\t"+ret.length+"\t"+leftOutColumns.size()); int x = 0; int y = 0; for(int i = 0 ; i < matrix.length ; i++) { if(!leftOutColumns.contains(i)) { y = 0; for(int j = 0 ; j < matrix[0].length ; j++) { if(!leftOutColumns.contains(j)) { ret[x][y] = matrix[i][j]; y++; } } x++; } } return ret; } @Override public void newSample(State state, int no, int total) { //System.out.println(t.length+" "+curAlig.leafNames.length+" "+curAlig.leafAlignment.length); //System.out.println(curAlig.leafNames); for (int i = 0; i < t.length; i++) { //System.out.println(i+" "+curAlig.leafNames[i]); String unpaddedName = curAlig.leafNames[i].replace(" ",""); t[i] = new String[]{unpaddedName,curAlig.leafAlignment[i]}; } Arrays.sort(t, compStringArr); if(experimental) { if(no == 0) { dataset.title = title; dataset.randomSeed = mcmc.mcmcpars.seed; dataset.burnIn = mcmc.mcmcpars.burnIn; dataset.mcmcSteps = mcmc.mcmcpars.cycles; dataset.samplingRate = mcmc.mcmcpars.sampRate; } } if (sequences == null) { sequences = new String[sizeOfAlignments]; int len = t[0][1].length(); for (int i = 0; i < sizeOfAlignments; i++) { sequences[i] = ""; for (int j = 0; j < len; j++) { if (t[i][1].charAt(j) != '-') { sequences[i] += t[i][1].charAt(j); } } } } if(no == 0) { if(samplingAndAveragingPPfold) { summedBasePairProbMatrix = new double[d][d]; weightedBasePairProb = new double[d][d]; summedSingleBaseProb = new double[d]; } if(samplingAndAveragingRNAalifold) { summedBasePairProbRNAalifold = new double[d][d]; } } boolean cont = true; if (cont) { /*// initialise matrices if (no == 0) { weightedBasePairProb = new double[d][d]; for (int i = 0; i < d; ++i) { for (int j = 0; j < d; ++j) { summedBasePairProbMatrix[i][j] = 0; weightedBasePairProb[i][j] = 0; summedBasePairProbRNAalifold[i][j] = 0; } } summedSingleBaseProb = new double[d]; }*/ try { List<String> lines = new ArrayList<String>(); for (int i = 0; i < sequences.length; ++i) { lines.add(">" + t[i][0].trim()); lines.add(t[i][1]); } Alignment align = AlignmentReader.readAlignmentFromStringList(lines); Tree tree = getPPfoldTree(mcmc); if(samplingAndAveragingPPfold) { System.setOut(logFile); ResultBundle sampleResult = PPfoldMain.fold2(progress, align.getSequences(), align.getNames(), tree, param, extradata); logFile.flush(); System.setOut(System.out); entropySample = sampleResult.entropyVal; float[][] basePairProb = sampleResult.finalmatrix; float[] singleBaseProb = new float[basePairProb.length]; for (int x = 0; x < basePairProb.length; x++) { singleBaseProb[x] = 1; for (int y = 0; y < basePairProb[0].length; y++) { singleBaseProb[x] -= basePairProb[x][y]; } } ArrayList<String> sequences = new ArrayList<String>(); for (int k = 0; k < t.length; k++) { sequences.add(t[k][1]); } float[][] projectSample = Mapping.projectMatrix(PPFold.getSequenceByName(t, refSeqName), basePairProb, '-'); float[] projectSingleBaseProb = Mapping.projectarray(PPFold.getSequenceByName(t, refSeqName),singleBaseProb, '-'); // normalise projected matrix for(int x = 0 ; x < projectSample.length ; x++) { double rowMatrSum = 0; for(int y = 0 ; y < projectSample[0].length ; y++) { rowMatrSum += projectSample[x][y]; } double factor = rowMatrSum + projectSingleBaseProb[x]; factor = 1; for(int y = 0 ; y < projectSample[0].length ; y++) { projectSample[x][y] = (float)(projectSample[x][y] / factor); } projectSingleBaseProb[x] /= factor; } double alignmentLogLikelihood = state.logLike; probMatrix = new float[d][d]; //float [][] rnaAlifoldProbMatrix = new float[d][d]; double weight = Math.pow(firstLikelihood / alignmentLogLikelihood, beta); float [] singleMatrix = new float[d]; for (int i = 0; i < d; ++i) { summedSingleBaseProb[i] += projectSingleBaseProb[i]; singleMatrix[i] = (float)summedSingleBaseProb[i] / (noSamples+1); for (int j = 0; j < d; ++j) { summedBasePairProbMatrix[i][j] += projectSample[i][j]; probMatrix[i][j] = (float)(summedBasePairProbMatrix[i][j]/(noSamples+1)); weightedBasePairProb[i][j] += projectSample[i][j]*weight; } } weightedSum += weight; if(consensusEvolutionPrediction) { if(no == 0) { //RNAFoldingTools.writeToFile(new File(title+"_entropy.txt"),"",false); summedPhyloProbs = new double[d][d]; countPhyloProbs = new double[d][d]; columnCounts = new double[d]; n = 0; } //RNAFoldingTools.writeToFile(new File(title+"_dimensions.txt"), sampleResult.phyloProbs.length+"\t"+sampleResult.phyloProbs[0].length+"\t"+PPFold.getSequenceByName(t, refSeqName).length(), true); //RNAFoldingTools.writeMatrix(PPFold.reconstituteMatrix(sampleResult.phyloProbs, sampleResult.leftOutColumns), new File(title+"_phylo_recon.bp")); //double [] [] phyloProbs = PPFold.reconstituteMatrix(sampleResult.phyloProbs, sampleResult.leftOutColumns); double [][] phyloProbs = Mapping.projectMatrix(PPFold.getSequenceByName(t, refSeqName),PPFold.reconstituteMatrix(sampleResult.phyloProbs, sampleResult.leftOutColumns), '-'); //ArrayList<Integer> projectedLeftOutColumns = new ArrayList<Integer>(); //projectedLeftOutColumns.addAll(sampleResult.leftOutColumns); String ref = PPFold.getSequenceByName(t, refSeqName); int [] columns = Mapping.getProjectionIndices(ref, '-'); /*int [] columns = new int[ref.length()]; Arrays.fill(columns, -1); int x = 0; for(int i = 0 ; i < ref.length() ; i++) { if(ref.charAt(i) == '-') { } else { columns[i] = x; x++; } }*/ ArrayList<Integer> projectedLeftOutColumns = new ArrayList<Integer>(); for(int i = 0 ; i < sampleResult.leftOutColumns.size() ; i++) { int y = sampleResult.leftOutColumns.get(i); if(columns[y] != -1) { projectedLeftOutColumns.add(columns[y]); } } /* for(int i = 0 ; i < removedIndices.size() ; i++) { projectedLeftOutColumns.remove(removedIndices.get(i)); } Collections.sort(removedIndices); for(int i = 0 ; i < removedIndices.size() ; i++) { for(int j = 0 ; j < projectedLeftOutColumns.size() ; j++) { if(removedIndices.get(i) <= projectedLeftOutColumns.get(j)) { projectedLeftOutColumns.set(j, projectedLeftOutColumns.get(j)-1); } } }*/ PPFold.incrementCountMatrix(countPhyloProbs, projectedLeftOutColumns); for(int i = 0 ; i < summedPhyloProbs.length ; i++) { for(int j = 0 ; j < summedPhyloProbs[0].length ; j++) { summedPhyloProbs[i][j] += phyloProbs[i][j]; } } //RNAFoldingTools.writeMatrix(countPhyloProbs, new File(title+"_phylo_count.bp")); //RNAFoldingTools.writeMatrix(summedPhyloProbs, new File(title+"_phylo_summed.bp")); for(int i = 0 ; i < columnCounts.length ; i++) { if(!projectedLeftOutColumns.contains(i)) { columnCounts[i]++; } } n++; averagedPhyloProbs = new double[summedPhyloProbs.length][summedPhyloProbs[0].length]; for(int i = 0 ; i < summedPhyloProbs.length ; i++) { for(int j = 0 ; j < summedPhyloProbs[0].length ; j++) { //double divider = (columnCounts[i]+columnCounts[j])/2; double divider = countPhyloProbs[i][j]; if(divider != 0) { //averagedPhyloProbs[i][j] = summedPhyloProbs[i][j]/n; averagedPhyloProbs[i][j] = summedPhyloProbs[i][j]/divider; } } } if(no < 25) { leftOutColumns.clear(); for(int i = 0 ; i < columnCounts.length ; i++) { if(columnCounts[i]/n <= 0.25) { leftOutColumns.add(i); } } } if(no % 5 == 0) { performConsensusEvolutionPrediction(); } } Structure.updateBasePairMatrix(probMatrix); Structure.updateSingleMatrix(singleMatrix); PPfoldMain.setfoldingfinished(true); if(gui != null) { gui.changeDimension(d*PPFoldGUI.OFFSET); gui.setMatrix(probMatrix); gui.repaint(); } if(experimental) { dataset.sampledStructures.add(sampleResult.getSmallBundle()); int [] samplePairedSitesProjected = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(projectSample); dataset.pairedSitesProjectedSamples.add(samplePairedSitesProjected); } } if(samplingAndAveragingRNAalifold) { RNAalifoldResult rnaAlifoldSampleResult = RNAalifold.fold(align.getSequences(), align.getNames(), rnaAlifoldParameters); double [][] rnaAlifoldMatrixSample = rnaAlifoldSampleResult.matrix; ArrayList<String> sequences = new ArrayList<String>(); for (int k = 0; k < t.length; k++) { sequences.add(t[k][1]); } float[][] rnaAlifoldProjectedSample = Mapping.projectMatrix(PPFold.getSequenceByName(t, refSeqName), RNAFoldingTools.getFloatMatrix(rnaAlifoldMatrixSample), '-'); for (int i = 0; i < d; ++i) { for (int j = 0; j < d; ++j) { summedBasePairProbRNAalifold[i][j] += rnaAlifoldProjectedSample[i][j]; } } //System.out.println("RNAalifold over here " + samplingAndAveragingPPfold ); if(!samplingAndAveragingPPfold) // if ppfold not running, display RNAalifold on the GUI { //System.out.println("HERE"); probMatrix = new float[d][d]; double [] summedRNAalifoldSingleBaseProb = RNAFoldingTools.getSingleBaseProb(summedBasePairProbRNAalifold); float [] singleMatrix = new float[d]; for (int i = 0; i < d; ++i) { singleMatrix[i] = (float)summedRNAalifoldSingleBaseProb[i] / (noSamples+1); for (int j = 0; j < d; ++j) { probMatrix[i][j] = (float)(summedBasePairProbRNAalifold[i][j]/(noSamples+1)); } } Structure.updateBasePairMatrix(probMatrix); Structure.updateSingleMatrix(singleMatrix); PPfoldMain.setfoldingfinished(true); if(gui != null) { gui.changeDimension(d*PPFoldGUI.OFFSET); gui.setMatrix(probMatrix); gui.repaint(); } } if(experimental) { dataset.pairedSitesProjectedRnaAlifoldSamples.add(rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(rnaAlifoldProjectedSample)); } } } catch(IOException ex) { ex.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } noSamples += 1; } // save alignment sample information if(no == 0) { AlignmentData al = new AlignmentData(); for(int k = 0 ; k < t.length ; k++) { al.sequences.add(t[k][1]); al.names.add(t[k][0]); } alignments.add(al); /* appendAlignment("reference", inputAlignment, new File(outDir+mpdAlignment.input.title+".samples"), false); appendAlignment(no+"", t, new File(outDir+mpdAlignment.input.title+".samples"), true); RNAFoldingTools.writeToFile(new File(outDir +title+"_Distance.txt"),"BurnIn "+new Integer(mcmc.mcmcpars.burnIn).toString() + "\t" + "cycles "+new Integer(mcmc.mcmcpars.cycles).toString() + "\t" + "sampRate "+new Integer(mcmc.mcmcpars.sampRate).toString() + "\t" + "Filename "+ mpdAlignment.input.title + "\t", false);*/ tempAlignment = new ArrayList<String>(); for(int k = 0 ; k < t.length ; k++) { tempAlignment.add(t[k][1]); } //& appendAlignment("reference", inputAlignment, new File(outDir+title+".samples"), false); //& appendAlignment(no+"", t, new File(outDir+title+".samples"), true); //& RNAFoldingTools.writeToFile(new File(outDir+title+"_entropy_fuzzy_obs.txt"),"", false); //& RNAFoldingTools.writeToFile(new File(outDir+title+"_entropy_fuzzy_exp.txt"),"", false); //& RNAFoldingTools.writeToFile(new File(outDir+title+"_entropy_samples.txt"),"", false); //& RNAFoldingTools.writeToFile(new File(outDir+title+"_entropy_mpd.txt"),"", false); } if(experimental) { dataset.logLikelihoods.add(mcmc.mcmcStep.newLogLike); } al = new AlignmentData(); for(int k = 0 ; k < t.length ; k++) { al.sequences.add(t[k][1]); al.names.add(t[k][0]); } alignments.add(al); // if(experimental) // { // // write sample file // try // { // boolean append = true; // BufferedWriter buffer = new BufferedWriter(new FileWriter(new File(outDir, title+".samples"), append)); // buffer.write("%"+no+"\n"); // buffer.write(al.toString()); // buffer.close(); // } // catch(IOException ex) // { // ex.printStackTrace(); // } // } if(fuzzyFolding) { try { FuzzyAlignment fuzzyAlignment2 = FuzzyAlignment.getFuzzyAlignmentAndProject(alignments, refSeqName); //ResultBundle fuzzyResultExp2 = PPfoldMain.foldFuzzyAlignment(progress, fuzzyAlignment2, null, param, extradata, true); ResultBundle fuzzyResultObs2 = PPfoldMain.foldFuzzyAlignment(progress, fuzzyAlignment2, null, param, extradata, false); //entropyObs = fuzzyResultObs2.entropyVal; //entropyExp = fuzzyResultExp2.entropyVal; /*if(!samplingAndAveragingPPfold) // if ppfold not running, display fuzzy folding on the GUI { }*/ } catch(Exception ex) { ex.printStackTrace(); } if(experimental) { double dist = Distance.AMA(tempAlignment, al.sequences); //RNAFoldingTools.writeToFile(new File(outDir +title+"_Distance.txt"),new Double(dist).toString(), true); FuzzyAlignment fuzzyAlignment = FuzzyAlignment.getFuzzyAlignmentAndProject(alignments, refSeqName); List<ExtraData> extradata = new ArrayList<ExtraData>(); try { Tree tree = null; //Tree tree = getPPfoldTree(mcmc); //getPPfoldTree(mcmc).print(); ResultBundle fuzzyResultExp = PPfoldMain.foldFuzzyAlignment(progress, fuzzyAlignment, tree, param, extradata, true); ResultBundle fuzzyResultObs = PPfoldMain.foldFuzzyAlignment(progress, fuzzyAlignment, tree, param, extradata, false); double finalEntropyExpReliabilityScore = fuzzyResultExp.getPPfoldReliability(); double finalEntropyObsReliabilityScore = fuzzyResultObs.getPPfoldReliability(); int [] fuzzyPairedSitesExp = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(fuzzyResultExp.finalmatrix); int [] fuzzyPairedSitesObs = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(fuzzyResultObs.finalmatrix); dataset.resultBundleEntropyExp = fuzzyResultExp.getSmallBundle(); dataset.resultBundleEntropyObs = fuzzyResultObs.getSmallBundle(); dataset.pairedSitesEntropyExp = fuzzyPairedSitesExp; dataset.pairedSitesEntropyObs = fuzzyPairedSitesObs; dataset.cumulativeFuzzyAlignment.add(fuzzyAlignment); dataset.sampledAlignments.add(al); dataset.cumulativeFuzzyExpResults.add(fuzzyResultExp.getSmallBundle()); dataset.cumulativeFuzzyObsResults.add(fuzzyResultObs.getSmallBundle()); } catch (InterruptedException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } } } int stepCounter = 0; public ResultBundle performConsensusEvolutionPrediction() { ResultBundle consensusEvolutionResult = null; try { AlignmentData input = new AlignmentData(); for(int k = 0 ; k < t.length ; k++) { input.sequences.add(t[k][1]); input.names.add(t[k][0]); } AlignmentData projectedAlignment = FuzzyAlignment.projectAlignment(input.sequences, input.names, refSeqName); averagedPhyloProbs = leaveOutColumns(averagedPhyloProbs, leftOutColumns); //RNAFoldingTools.writeMatrix(averagedPhyloProbs, new File(outDir+"/"+title+"_phylo.bp")); //ResultBundle matrixResult = PPfoldMain.foldMatrix(progress, input.sequences, input.names, sampleResult.phyloProbs, param, extradata); //ResultBundle matrixResult = PPfoldMain.foldMatrix(progress, projectedAlignment.sequences, projectedAlignment.names, sampleResult.phyloProbs, param, extradata); consensusEvolutionResult = PPfoldMain.foldMatrix(progress, projectedAlignment.sequences, projectedAlignment.names, averagedPhyloProbs, param, extradata); //ResultBundle matrixResult = PPfoldMain.foldMatrix(progress, projectedAlignment.sequences, projectedAlignment.names, RNAFoldingTools.getDoubleMatrix(probMatrix), param, extradata); dataset.matrixFolds.add(consensusEvolutionResult.getSmallBundle()); Collections.sort(leftOutColumns); StringBuffer structure = new StringBuffer(""); structure.append(consensusEvolutionResult.getStructure()); for(int i = 0 ; i < leftOutColumns.size() ; i++) { structure.insert(leftOutColumns.get(i).intValue(), '.'); } entropyObs = consensusEvolutionResult.entropyVal; dataset.pairedSitesMatrix = RNAFoldingTools.getPairedSitesFromDotBracketString(structure.toString()); dataset.ppfoldReliabilityScoreConsensusEvol = consensusEvolutionResult.getPPfoldReliability(); dataset.pairsOnlyReliabilityScoreConsensusEvol = consensusEvolutionResult.getPairsOnlyReliability(); } catch(Exception ex) { ex.printStackTrace(); } return consensusEvolutionResult; } public void computeFuzzyAlignment() { FuzzyAlignment fuzzyAlignment = FuzzyAlignment.getFuzzyAlignmentAndProject(alignments, refSeqName); System.out.println("Fuzzy alignment size"+fuzzyAlignment.columns.size()); List<ExtraData> extradata = new ArrayList<ExtraData>(); try { Tree tree = null; String name; ResultBundle fuzzyResultObs = PPfoldMain.foldFuzzyAlignment(progress, fuzzyAlignment, tree, param, extradata, false); //System.out.println("Fuzzy matrix size"+fuzzyResultObs.finalmatrix[0].length); int [] fuzzyPairedSitesObs = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(fuzzyResultObs.finalmatrix); char [] structure = fuzzyResultObs.getStructure(); RNAFoldingTools.saveCtFile(new File(outDir, name=title+".fuzzy.ct"), fuzzyPairedSitesObs, title, refSeq); fileList.add(name); fileDesc.add("Fuzzy alignment RNA structure using PPfold in Connect format"); RNAFoldingTools.saveDotBracketFile(new File(outDir, name=title+".fuzzy.dbn"), fuzzyPairedSitesObs, title, refSeq); fileList.add(name); fileDesc.add("Fuzzy alignment RNA structure using PPfold in Vienna format"); RNAFoldingTools.writeMatrix(RNAFoldingTools.getDoubleMatrix(fuzzyResultObs.finalmatrix), new File(outDir, name=title+".fuzzy.bp")); fileList.add(name); fileDesc.add("Fuzzy alignment RNA base-pairing matrix using PPfold"); //if(experimental) //{ //dataset.pp dataset.pairedSitesEntropyObs = fuzzyPairedSitesObs; dataset.ppfoldReliabilityEntropyObs = RNAFoldingTools.calculatePPfoldReliabilityScore(fuzzyPairedSitesObs, RNAFoldingTools.getDoubleMatrix(fuzzyResultObs.finalmatrix)); dataset.pairsOnlyReliabilityEntropyObs = RNAFoldingTools.calculatePairsOnlyReliabilityScore(fuzzyPairedSitesObs, RNAFoldingTools.getDoubleMatrix(fuzzyResultObs.finalmatrix)); dataset.pairsOnlyReliabilityEntropyObsPosteriorWeighted = RNAFoldingTools.calculatePairsOnlyReliabilityScore(fuzzyPairedSitesObs, RNAFoldingTools.getDoubleMatrix(fuzzyResultObs.finalmatrix), dataset.posteriors); dataset.fuzzyAlignmentObsEntropyVal = fuzzyResultObs.entropyVal; dataset.fuzzyAlignmentObsEntropyPerc = fuzzyResultObs.entropyPercOfMax; dataset.fuzzyAlignmentObsEntropyMax = fuzzyResultObs.entropyMax; //} } catch (InterruptedException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } List<String> fileList; List<String> fileDesc; @Override public void afterLastSample() { fileList = new ArrayList<String>(); fileDesc = new ArrayList<String>(); String name; // fileList.add(title+".samples"); // fileDesc.add("RNA structure samples"); if(noSamples == 0) return; // calculate posterior avg posteriorProbabilityAvg = 0; for(int i = 0 ; i < mpdAlignment.decoding.length ; i++) { posteriorProbabilityAvg += mpdAlignment.decoding[i]; dataset.posteriors.add(mpdAlignment.decoding[i]); } posteriorProbabilityAvg /= (mpdAlignment.decoding.length); if(experimental) { dataset.posteriorsAverage = posteriorProbabilityAvg; } double[][] doubleSummedArrayRNAalifold = new double[d][d]; if(samplingAndAveragingRNAalifold) { for (int i = 0; i < d; ++i) { for (int j = 0; j < d; ++j) { doubleSummedArrayRNAalifold[i][j] = summedBasePairProbRNAalifold[i][j] / noSamples; } } int [] pairedSitesRNAalifold = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(doubleSummedArrayRNAalifold); dataset.pairedSitesRNAalifold = pairedSitesRNAalifold; dataset.ppfoldReliabilityScoreRNAalifold = RNAFoldingTools.calculatePPfoldReliabilityScore(dataset.pairedSitesRNAalifold, doubleSummedArrayRNAalifold); dataset.pairsOnlyReliabilityScoreRNAalifold = RNAFoldingTools.calculatePairsOnlyReliabilityScore(dataset.pairedSitesRNAalifold, doubleSummedArrayRNAalifold); RNAFoldingTools.saveCtFile(new File(outDir, name=title+".rnaalifold.ct"), pairedSitesRNAalifold, title, refSeq); fileList.add(name); fileDesc.add("Consensus RNA structure using RNAalifold in Connect format"); RNAFoldingTools.saveDotBracketFile(new File(outDir, name=title+".rnaalifold.dbn"), pairedSitesRNAalifold, title, refSeq); fileList.add(name); fileDesc.add("Consensus RNA structure using RNAalifold in Vienna format"); RNAFoldingTools.writeMatrix(doubleSummedArrayRNAalifold, new File(outDir, name=title+".rnaalifold.bp")); fileList.add(name); fileDesc.add("Consensus RNA base-pairing matrix using RNAalifold"); } double[][] doubleSummedArrayPPfold = new double[d][d]; if(samplingAndAveragingPPfold) { // average matrices, important for posterior decoding double[] doubleSingleBaseProb = new double[d]; for (int i = 0; i < d; ++i) { doubleSingleBaseProb[i] = summedSingleBaseProb[i] / noSamples; for (int j = 0; j < d; ++j) { doubleSummedArrayPPfold[i][j] = summedBasePairProbMatrix[i][j] / noSamples; } } int[] pairedSites = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(doubleSummedArrayPPfold); double statalignPpfoldReliablityScore = RNAFoldingTools.calculatePPfoldReliabilityScore(pairedSites, doubleSummedArrayPPfold); RNAFoldingTools.saveCtFile(new File(outDir, name=title+".ppfold.ct"), pairedSites, title, refSeq); fileList.add(name); fileDesc.add("Consensus RNA structure using PPfold in Connect format"); RNAFoldingTools.saveDotBracketFile(new File(outDir, name=title+".ppfold.dbn"), pairedSites, title, refSeq); fileList.add(name); fileDesc.add("Consensus RNA structure using PPfold in Vienna format"); RNAFoldingTools.writeMatrix(doubleSummedArrayPPfold, new File(outDir, name=title+".ppfold.bp")); fileList.add(name); fileDesc.add("Consensus RNA base-pairing matrix using PPfold"); //if(experimental) //{ for (int i = 0; i < d; ++i) { for (int j = 0; j < d; ++j) { weightedBasePairProb[i][j] = weightedBasePairProb[i][j] / noSamples; } } dataset.pairedSitesWeighted = RNAFoldingTools.getPosteriorDecodingConsensusStructure(weightedBasePairProb); dataset.ppfoldReliabilityScoreSamplingAndAveragingWeighted = RNAFoldingTools.calculatePPfoldReliabilityScore(dataset.pairedSitesWeighted, weightedBasePairProb); dataset.pairsOnlyReliabilityScoreSamplingAndAveragingWeighted = RNAFoldingTools.calculatePairsOnlyReliabilityScore(dataset.pairedSitesWeighted, weightedBasePairProb); dataset.pairedSites = pairedSites; dataset.ppfoldReliabilityScoreSamplingAndAveraging = statalignPpfoldReliablityScore; dataset.pairsOnlyReliabilityScoreSamplingAndAveraging = RNAFoldingTools.calculatePairsOnlyReliabilityScore(pairedSites, doubleSummedArrayPPfold); dataset.pairsOnlyReliabilityScoreSamplingAndAveragingPosteriorWeighted = RNAFoldingTools.calculatePairsOnlyReliabilityScore(pairedSites, doubleSummedArrayPPfold, dataset.posteriors); dataset.pairedSitesRefSeq = refSeqGapped; for (int i = 0; i < d; ++i) { for (int j = 0; j < d; ++j) { weightedBasePairProb[i][j] /= weightedSum; } } int [] pairedSitesWeighted= rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(weightedBasePairProb); double statalignWeightedPpfoldReliablityScore = RNAFoldingTools.calculatePPfoldReliabilityScore(pairedSites, weightedBasePairProb); dataset.pairedSitesWeighted = pairedSitesWeighted; dataset.ppfoldReliabilityScoreSamplingAndAveragingWeighted = statalignWeightedPpfoldReliablityScore; dataset.pairsOnlyReliabilityScoreSamplingAndAveragingWeighted = RNAFoldingTools.calculatePairsOnlyReliabilityScore(pairedSitesWeighted, weightedBasePairProb); for(int i = 0 ; i < mpdAlignment.alignment.length && i < dataset.inputAlignment.names.size() ; i++) { dataset.mpdAlignment.names.add(dataset.inputAlignment.names.get(i)); dataset.mpdAlignment.sequences.add(mpdAlignment.alignment[i]); } dataset.ppfoldReliabilityScoreSamplingAndAveraging = statalignPpfoldReliablityScore; double ppfoldReliablityScore = RNAFoldingTools.calculatePPfoldReliabilityScore(pairedSites, doubleSummedArrayPPfold); double proxySim = getProxySimilarityFromAvgPosterior(posteriorProbabilityAvg); ArrayList<String> mpdSequences = new ArrayList<String>(); ArrayList<String> mpdNames = new ArrayList<String>(); for(int i = 0 ; i < mpdAlignment.alignment.length ; i++) { mpdNames.add(mpdAlignment.sequenceNames[i]); mpdSequences.add(mpdAlignment.alignment[i]); } dataset.mpdVsInputSim = Distance.AMA(mpdSequences, dataset.inputAlignment.sequences); calculateMPD(mpdSequences, mpdNames); //System.out.println("Proxy distance = " + Distance.amaScoreToMultiDistance(mpdSequences,proxySim)); double improvedReliabilityScore1 = posteriorProbabilityAvg*statalignPpfoldReliablityScore; //System.out.println("Improved reliability score 1 = " + improvedReliabilityScore1); double improvedReliabilityScore2 = proxySim*statalignPpfoldReliablityScore; //} ResultBundle consensusEvolutionPrediction = performConsensusEvolutionPrediction(); RNAFoldingTools.saveCtFile(new File(outDir, name=title+".cons_evol.ct"), dataset.pairedSitesMatrix, title, refSeq); fileList.add(name); fileDesc.add("Consensus evolution RNA structure in Connect format"); RNAFoldingTools.saveDotBracketFile(new File(outDir, name=title+".cons_evol.dbn"), dataset.pairedSitesMatrix, title, refSeq); fileList.add(name); fileDesc.add("Consensus evolution RNA structure in Vienna format"); RNAFoldingTools.writeMatrix(RNAFoldingTools.getDoubleMatrix(consensusEvolutionPrediction.finalmatrix), new File(outDir, name=title+".cons_evol.bp")); fileList.add(name); fileDesc.add("Consensus evolution RNA base-pairing matrix"); } if(samplingAndAveragingPPfold && samplingAndAveragingRNAalifold) { double [][] combinedBasePairProb = new double[d][d]; for(int i = 0 ; i < d ; i++) { for(int j = 0 ; j < d ; j++) { combinedBasePairProb[i][j] = (doubleSummedArrayPPfold[i][j]+doubleSummedArrayRNAalifold[i][j])/2; } } int [] combinedPairedSites = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(combinedBasePairProb); dataset.pairedSitesCombined = combinedPairedSites; dataset.ppfoldReliabilityScoreCombined = RNAFoldingTools.calculatePPfoldReliabilityScore(combinedPairedSites, combinedBasePairProb); dataset.pairsOnlyReliabilityScoreCombined = RNAFoldingTools.calculatePairsOnlyReliabilityScore(combinedPairedSites, combinedBasePairProb); RNAFoldingTools.saveCtFile(new File(outDir, name=title+".rnaalifold_and_ppfold.ct"), combinedPairedSites, title, refSeq); fileList.add(name); fileDesc.add("Consensus RNA structure using RNAalifold and PPfold combined, in Connect format"); RNAFoldingTools.saveDotBracketFile(new File(outDir, name=title+".rnaalifold_and_ppfold.dbn"), combinedPairedSites, title, refSeq); fileList.add(name); fileDesc.add("Consensus RNA structure using RNAalifold and PPfold combined, in Vienna format"); RNAFoldingTools.writeMatrix(combinedBasePairProb, new File(outDir, name=title+".rnaalifold_and_ppfold.bp")); fileList.add(name); fileDesc.add("Consensus RNA base-pairing matrix using RNAalifold and PPfold combined"); } if(fuzzyFolding) { computeFuzzyAlignment(); } if(experimental) { dataset.saveDatasetResult(new File(outDir, name=title+".serialized")); fileList.add(name); fileDesc.add("RNA structure dataset serialized"); } DecimalFormat df = new DecimalFormat("0.000"); try { File outFile = new File(outDir, name=title+".info"); fileList.add(name); fileDesc.add("RNA secondary structure info file"); BufferedWriter buffer = new BufferedWriter(new FileWriter(outFile)); if(samplingAndAveragingPPfold) { buffer.write(">method=sampling_and_averaging_ppfold\ttitle="+title+"\tlength="+dataset.pairedSites.length+"\treliability_score="+df.format(dataset.pairsOnlyReliabilityScoreSamplingAndAveraging)+"\n"); buffer.write(refSeqGapped.replaceAll("-", "")+"\n"); buffer.write(RNAFoldingTools.getDotBracketStringFromPairedSites(dataset.pairedSites)+"\n"); ResultBundle lastResult = dataset.matrixFolds.get(dataset.matrixFolds.size()-1); buffer.write(">method=consensus_evolution_ppfold\ttitle="+title+"\tlength="+dataset.pairedSitesMatrix.length+"\treliability_score="+df.format(dataset.pairsOnlyReliabilityScoreConsensusEvol)+"\tentropy="+lastResult.entropyVal+"\tperc_of_max_entropy="+lastResult.entropyPercOfMax+"\t"+"\tmax_entropy="+lastResult.entropyMax+"\t"+"\n"); buffer.write(refSeqGapped.replaceAll("-", "")+"\n"); buffer.write(RNAFoldingTools.getDotBracketStringFromPairedSites(dataset.pairedSitesMatrix)+"\n"); } if(samplingAndAveragingRNAalifold) { buffer.write(">method=sampling_and_averaging_rnaalifold\ttitle="+title+"\tlength="+dataset.pairedSitesRNAalifold.length+"\treliability_score="+df.format(dataset.pairsOnlyReliabilityScoreRNAalifold)+"\n"); buffer.write(refSeqGapped.replaceAll("-", "")+"\n"); buffer.write(RNAFoldingTools.getDotBracketStringFromPairedSites(dataset.pairedSitesRNAalifold)+"\n"); } if(samplingAndAveragingPPfold && samplingAndAveragingRNAalifold) { buffer.write(">method=sampling_and_averaging_rnaalifold_and_ppfold\ttitle="+title+"\tlength="+dataset.pairedSitesCombined.length+"\treliability_score="+df.format(dataset.pairsOnlyReliabilityScoreCombined)+"\n"); buffer.write(refSeqGapped.replaceAll("-", "")+"\n"); buffer.write(RNAFoldingTools.getDotBracketStringFromPairedSites(dataset.pairedSitesCombined)+"\n"); } if(fuzzyFolding) { buffer.write(">method=fuzzy_alignment_ppfold\ttitle="+title+"\tlength="+dataset.pairedSitesEntropyObs.length+"\treliability_score="+df.format(dataset.pairsOnlyReliabilityEntropyObs)+"\tentropy="+dataset.fuzzyAlignmentObsEntropyVal+"\tperc_of_max_entropy="+dataset.fuzzyAlignmentObsEntropyPerc+"\t"+"\tmax_entropy="+dataset.fuzzyAlignmentObsEntropyMax+"\t"+"\n"); buffer.write(refSeqGapped.replaceAll("-", "")+"\n"); buffer.write(RNAFoldingTools.getDotBracketStringFromPairedSites(dataset.pairedSitesEntropyObs)+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } public void saveScores(File outFile, double ppfoldReliabilityStatAlign, double ppfoldReliabilityMPD) { boolean writeHeaders = !outFile.exists(); try { BufferedWriter buffer = new BufferedWriter(new FileWriter(outFile, !writeHeaders)); if(writeHeaders) { buffer.write("Dataset\tPosterior avg.\tProxy similarity\tProxy distance\tPPfold reliability (StatAlign)\tPPfold reliability(MPD)\tBurn-in\tCycles\tSample rate\n"); } double proxySimilarity = getProxySimilarityFromAvgPosterior(posteriorProbabilityAvg); ArrayList<String> sequences = new ArrayList<String>(); for(int i = 0 ; i < mpdAlignment.alignment.length ; i++) { sequences.add(mpdAlignment.alignment[i]); } double proxyDistance = Distance.amaScoreToDistance(sequences,proxySimilarity); buffer.write(title+"\t"); buffer.write(posteriorProbabilityAvg+"\t"); buffer.write(proxySimilarity+"\t"); buffer.write(proxyDistance+"\t"); buffer.write(ppfoldReliabilityStatAlign+"\t"); buffer.write(ppfoldReliabilityMPD+"\t"); buffer.write(mcmc.mcmcpars.burnIn+"\t"); buffer.write(mcmc.mcmcpars.cycles+"\t"); buffer.write(mcmc.mcmcpars.sampRate+"\t"); buffer.write("\n"); buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } public static double getProxySimilarityFromAvgPosterior(double avgPosterior) { return Math.min(1, avgPosterior*0.8084689957 + 0.2011076631); } public static void appendAlignment(String label, String [][] alignment, File outFile, boolean append) { try { BufferedWriter buffer = new BufferedWriter(new FileWriter(outFile, append)); buffer.write("%"+label+"\n"); for (int i = 0; i < alignment.length; i++) { buffer.write(">"+alignment[i][0] + "\n"); buffer.write(alignment[i][1] + "\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } public static String getSequenceByName(String[][] sequences, String name) { for (int i = 0; i < sequences.length; i++) { if (sequences[i] != null && sequences[i][0].equals(name)) { return sequences[i][1]; } } return null; } public static void saveResult(String sequence, int[] pairedSites, double[][] basePairProb, double[] singleBaseProb, File outFile) { DecimalFormat df = new DecimalFormat("0.0000"); try { BufferedWriter buffer = new BufferedWriter(new FileWriter(outFile)); buffer.write(sequence + "\n"); buffer.write(RNAFoldingTools .getDotBracketStringFromPairedSites(pairedSites) + "\n"); for (int k = 0; k < pairedSites.length; k++) { if (pairedSites[k] == 0) { buffer.write(RNAFoldingTools.pad((k + 1) + "", 4) + "\t" + RNAFoldingTools.pad(pairedSites[k] + "", 7) + RNAFoldingTools.pad("-", 6) + "\t" + RNAFoldingTools.pad(df.format(singleBaseProb[k]) + "", 6) + "\n"); } else { buffer.write(RNAFoldingTools.pad((k + 1) + "", 4) + "\t" + RNAFoldingTools.pad(pairedSites[k] + "", 7) + RNAFoldingTools.pad( df.format(basePairProb[k][pairedSites[k] - 1]), 6) + "\t" + RNAFoldingTools.pad(df.format(singleBaseProb[k]) + "", 6) + "\n"); } } buffer.close(); } catch (IOException ex) { ex.printStackTrace(); } } public static com.ppfold.algo.Tree getPPfoldTree(Mcmc mcmc) { try { return NewickReader.parse(mcmc.tree.printedTree()); } catch (Exception ex) { ex.printStackTrace(); } return null; } public String[][] getSequences() { return t; } public static String getRefName() { return refSeqName; } public static void appendFolds(File file, String name, String alignedSequence, int [] pairedSites, int [] projectedPairedSites, boolean append) { try { BufferedWriter buffer = new BufferedWriter(new FileWriter(file, append)); buffer.write(">"+name+"\n"); buffer.write(alignedSequence+"\n"); buffer.write(RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSites)+"\n"); buffer.write(alignedSequence.replaceAll("-", "")+"\n"); buffer.write(RNAFoldingTools.getDotBracketStringFromPairedSites(projectedPairedSites)+"\n"); buffer.close(); } catch (IOException e) { e.printStackTrace(); } } private boolean getTheNextSample(ArrayList<Double> distanceList, int sampleNumber){ if(sampleNumber > 40 && isThereMajorIncrease(distanceList)){ return true; } else{ return false; } } private boolean isThereMajorIncrease(ArrayList<Double> distanceList){ final int SAMPLES = 4; double[] lastSAMPLESDifference = new double[SAMPLES]; double[] lastSAMPLES = new double[SAMPLES]; double average = 0; double min = 99999999; for(int i = SAMPLES-1; i>=0; --i){ int indexBigger = distanceList.size()-i-1; int indexSmaller = indexBigger-1; lastSAMPLESDifference[i] = distanceList.get(indexBigger) - distanceList.get(indexSmaller); lastSAMPLES[i] = distanceList.get(i); min = Math.min(min,lastSAMPLESDifference[i]); } return true; } private int mapSeqIDtoMinimumNumberOfSamplesTotake(double seqID){ return 40; } public static ArrayList<String> loadFolds(File file, int line) { ArrayList<String> list = new ArrayList<String>(); try { BufferedReader buffer = new BufferedReader(new FileReader(file)); String textline = null; int lines = 0; while((textline = buffer.readLine()) != null) { if((lines - line) % 5 == 0) { list.add(textline); } lines++; } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } return list; } }