package statalign.postprocess.plugins.benchmarks; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import com.ppfold.algo.FuzzyAlignment; import com.ppfold.algo.ResultBundle; import statalign.postprocess.plugins.PPFold; import statalign.postprocess.plugins.contree.hash.HashTable; import statalign.postprocess.utils.Mapping; import statalign.postprocess.utils.RNAFoldingTools; public class Benchmarks { public static void main(String[] args) { Benchmarks.automatedTests(); //Benchmarks.testVariation2(); //new Benchmarks().performEntropy(); //Benchmarks.automatedTest2(); System.exit(0); /* //Benchmarks.testData(); Benchmarks.automatedTest2(); //.automatedTest(); System.exit(0); */ String dir = "/home/michael/Dropbox/RNA and StatAlign/Distance/Datasets2/"; File [] files = new File(dir).listFiles(); for(int i = 0 ; i < files.length ; i++) { if(files[i].getName().toLowerCase().endsWith(".dat")) { ExperimentalData expData = Benchmarks.loadExperimentalStructure(files[i]); saveAsFasta(expData, new File(dir+files[i].getName()+".fas")); } } dir = "/home/michael/Dropbox/RNA and StatAlign/Distance/Datasets2/"; files = new File(dir).listFiles(); for(int i = 0 ; i < files.length ; i++) { if(files[i].getName().toLowerCase().endsWith(".ct")) { try { BufferedWriter buffer = new BufferedWriter(new FileWriter(new File(dir+files[i].getName()+".dbn"))); buffer.write(RNAFoldingTools.getDotBracketStringFromCtFile(files[i])+"\n"); buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } } /* ExperimentalData expData = Benchmarks.loadExperimentalStructure(new File("C:/Oxford/TestRNAData.tar/TestRNAData/TestRNAData1.dat")); ExperimentalData predictedData = Benchmarks.loadExperimentalStructure(new File("C:/Oxford/TestRNAData.tar/TestRNAData/TestRNAData1Predicted.dat")); System.out.println(calculateSensitivity(expData.pairedSites, predictedData.pairedSites)); System.out.println(calculatePPV(expData.pairedSites, predictedData.pairedSites)); System.out.println(calculateFScore(expData.pairedSites, predictedData.pairedSites)); */ } public static void testData() { String name = "TestRNAData1"; String dir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/"; String resultsDir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/Results0/"; File experimentalFile = new File(dir+name+".dat"); ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res")); //System.out.println(statalignResult.sequence+""); int [] pairedSitesExperimental = projectPairedSites(statalignResult.sequence, experimentalData.pairedSites); int [] pairedSitesStatAlign = statalignResult.pairedSites; File ppfoldData = new File("/home/michael/Dropbox/RNA and StatAlign/TestRNAData/StatAlign/1.ct"); /* double [][] bpMatrix = RNAFoldingTools.loadMatrix(new File("/home/michael/Dropbox/RNA and StatAlign/TestRNAData/StatAlign/1.bp")); float [][] floatMatrix = new float[bpMatrix.length][bpMatrix[0].length]; for(int i = 0; i < bpMatrix.length ; i++) { for(int j = 0; j < bpMatrix[0].length ; j++) { floatMatrix[i][j] = (float) bpMatrix[i][j]; } } String s = "GGGCGCCCGAGGCCGCCCGCCCCGGGCACGCCACCGCAAG------GCAGACAGAGAAAAGCCCCAGCCAACACCACGCGCCCCGCAAGACGCCCAACACCAA-CCCGAGGCCCAAC-CCACGCCCCACAAACGCAGGCCAGCCCCCCACGCGCCGAAAGGCAAG---GAGAAGCAGGCCACGAAG"; float [][] projectMatrix = Mapping.projectMatrix(s, floatMatrix, '-'); double [][] doubleMatrix = new double[projectMatrix.length][projectMatrix[0].length]; for(int i = 0; i < doubleMatrix.length ; i++) { for(int j = 0; j < doubleMatrix[0].length ; j++) { doubleMatrix[i][j] = floatMatrix[i][j]; } } */ //int [] pairedSitesPPfold = RNAFoldingTools.getPosteriorDecodingConsensusStructure(doubleMatrix); System.out.println(RNAFoldingTools.getDotBracketStringFromPairedSites(RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData))); int [] pairedSitesPPfold = projectPairedSites("GGGCGCCCGAGGCCGCCCGCCCCGGGCACGCCACCGCAAG------GCAGACAGAGAAAAGCCCCAGCCAACACCACGCGCCCCGCAAGACGCCCAACACCAA-CCCGAGGCCCAAC-CCACGCCCCACAAACGCAGGCCAGCCCCCCACGCGCCGAAAGGCAAG---GAGAAGCAGGCCACGAAG", RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); //File ppfoldData = new File("/host/Oxford/test1_mpd.ct"); //int [] pairedSitesPPfold = projectPairedSites("GGGCGCCCGAGGCCGCCCGCCCCGGGCACGCCACCGCAAG------GCAGACAGAGAAAAGCCCCAGCCAACACCACGCGCCCCGCAAGACGCCCAACACCAA-CCCGAGGCCCAAC-CCACGCCCCACAAACGCAGGCCAGCCCCCCACGCGCCGAAAGGCAAG---GAGAAGCAGGCCACGAAG", RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); //printPairs(pairedSitesPPfold); System.out.println("LENGTH:"+pairedSitesStatAlign.length); System.out.println(">"+name + " (" + (statalignResult.sequence.replaceAll("-", "").length())+")"); System.out.println(" "+statalignResult.sequence.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("S:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesStatAlign)); System.out.println(" "+statalignResult.sequence.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("P:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesPPfold)); double sensExpStat = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlign); double sensExpPPfold = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesPPfold); double ppvExpStat = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlign); double ppvExpPPfold = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesPPfold); double fscExpStat = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlign); double fscExpPPfold = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesPPfold); System.out.println(RNAFoldingTools.pad("", 14)+RNAFoldingTools.pad("StatAl", 10)+RNAFoldingTools.pad("PPfold", 10)); System.out.println(RNAFoldingTools.pad("Senstivity", 14)+RNAFoldingTools.pad(sensExpStat+"", 6)+" "+RNAFoldingTools.pad(sensExpPPfold+"", 6)); System.out.println(RNAFoldingTools.pad("PPV", 14)+RNAFoldingTools.pad(ppvExpStat+"", 6)+" "+RNAFoldingTools.pad(ppvExpPPfold+"", 6)); System.out.println(RNAFoldingTools.pad("F-score", 14)+RNAFoldingTools.pad(fscExpStat+"", 6)+" "+RNAFoldingTools.pad(fscExpPPfold+"", 6)); System.out.print("StatAl: "); printValues(pairedSitesExperimental, pairedSitesStatAlign); System.out.print("PPfold: "); printValues(pairedSitesExperimental, pairedSitesPPfold); System.out.println("---------------------------------------------------------------------"); System.out.println(); } public static void automatedTest() { String dir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/"; String resultsDir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/Results0/"; File [] files = new File(resultsDir).listFiles(); for(int i = 0 ; i < files.length ; i++) { String fullName = files[i].getName(); if(fullName.endsWith(".dat.res")) { String name = fullName.substring(0, fullName.length()-8); //File files = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/").listFiles(); File experimentalFile = new File(dir+name+".dat"); //File ourData = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNADATA1OURS"); //File statalignResultFile = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNAData1.dat.txt"); File ppfoldData = new File(dir+name+".dat.ct"); ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res")); String mappingSeq = ""; for(int j = 0 ;j < experimentalData.sequences.size() ; j++) { if(experimentalData.sequences.get(j).replaceAll("-", "").equals(statalignResult.sequence.replaceAll("-", ""))) { mappingSeq = experimentalData.sequences.get(j); } } //System.out.println(mappingSeq); //System.out.println(statalignResult.sequence); int [] pairedSitesExperimental = projectPairedSites(mappingSeq, experimentalData.pairedSites); int [] pairedSitesStatAlign = statalignResult.pairedSites; int [] pairedSitesPPfold = projectPairedSites(mappingSeq, RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); //printPairs(pairedSitesExperimental); //printPairs(pairedSitesStatAlign); //printPairs(pairedSitesPPfold); //System.out.println("X:"+statalignResult.sequence); //System.out.println("X:"+experimentalData.sequences.get(2)); System.out.println(">"+name + " (" + (statalignResult.sequence.replaceAll("-", "").length())+")"); System.out.println(" "+statalignResult.sequence.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("S:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesStatAlign)); System.out.println(" "+statalignResult.sequence.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("P:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesPPfold)); double sensExpStat = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlign); double sensExpPPfold = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesPPfold); double ppvExpStat = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlign); double ppvExpPPfold = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesPPfold); double fscExpStat = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlign); double fscExpPPfold = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesPPfold); System.out.println(RNAFoldingTools.pad("", 14)+RNAFoldingTools.pad("StatAl", 10)+RNAFoldingTools.pad("PPfold", 10)); System.out.println(RNAFoldingTools.pad("Senstivity", 14)+RNAFoldingTools.pad(sensExpStat+"", 6)+" "+RNAFoldingTools.pad(sensExpPPfold+"", 6)); System.out.println(RNAFoldingTools.pad("PPV", 14)+RNAFoldingTools.pad(ppvExpStat+"", 6)+" "+RNAFoldingTools.pad(ppvExpPPfold+"", 6)); System.out.println(RNAFoldingTools.pad("F-score", 14)+RNAFoldingTools.pad(fscExpStat+"", 6)+" "+RNAFoldingTools.pad(fscExpPPfold+"", 6)); System.out.print("StatAl: "); printValues(pairedSitesExperimental, pairedSitesStatAlign); System.out.print("PPfold: "); printValues(pairedSitesExperimental, pairedSitesPPfold); System.out.println("---------------------------------------------------------------------"); System.out.println(); } } } /*public static void automatedTest2() { String dir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/"; String resultsDir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/Results3/"; File [] files = new File(resultsDir).listFiles(); for(int i = 0 ; i < files.length ; i++) { String fullName = files[i].getName(); if(fullName.endsWith(".dat.res")) { String name = fullName.substring(0, fullName.length()-8); //File files = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/").listFiles(); File experimentalFile = new File(dir+name+".dat"); //File ourData = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNADATA1OURS"); //File statalignResultFile = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNAData1.dat.txt"); File ppfoldData = new File(dir+name+".dat.ct"); ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res")); StatAlignResult statalignWeightedResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res.weighted")); System.out.println(name); StatAlignResult mpdResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res.mpd")); String mappingSeq = ""; for(int j = 0 ;j < experimentalData.sequences.size() ; j++) { if(experimentalData.sequences.get(j).replaceAll("-", "").equals(statalignResult.sequence.replaceAll("-", ""))) { mappingSeq = experimentalData.sequences.get(j); } } //System.out.println(mappingSeq); //System.out.println(statalignResult.sequence); int [] pairedSitesExperimental = projectPairedSites(mappingSeq, experimentalData.pairedSites); int [] pairedSitesStatAlign = statalignResult.pairedSites; int [] pairedSitesStatAlignWeighted = statalignWeightedResult.pairedSites; int [] pairedSitesPPfold = projectPairedSites(mappingSeq, RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); int [] pairedSitesMPD = mpdResult.pairedSites; //printPairs(pairedSitesExperimental); //printPairs(pairedSitesStatAlign); //printPairs(pairedSitesPPfold); //System.out.println("X:"+statalignResult.sequence); //System.out.println("X:"+experimentalData.sequences.get(2)); System.out.println(">"+name + " (" + (statalignResult.sequence.replaceAll("-", "").length())+")"); System.out.println(" "+statalignResult.sequence.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("S:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesStatAlign)); System.out.println(" "+statalignResult.sequence.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("P:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesPPfold)); double sensExpStat = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlign); double sensExpPPfold = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesPPfold); double ppvExpStat = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlign); double ppvExpPPfold = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesPPfold); double fscExpStat = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlign); double fscExpPPfold = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesPPfold); double fscExpStatWeighted =Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlignWeighted); double fscExpStatMPD=Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesMPD); System.out.println(RNAFoldingTools.pad("", 14)+RNAFoldingTools.pad("StatAl", 10)+RNAFoldingTools.pad("PPfold", 10)); System.out.println(RNAFoldingTools.pad("Senstivity", 14)+RNAFoldingTools.pad(sensExpStat+"", 6)+" "+RNAFoldingTools.pad(sensExpPPfold+"", 6)); System.out.println(RNAFoldingTools.pad("PPV", 14)+RNAFoldingTools.pad(ppvExpStat+"", 6)+" "+RNAFoldingTools.pad(ppvExpPPfold+"", 6)); System.out.println(RNAFoldingTools.pad("F-score", 14)+RNAFoldingTools.pad(fscExpStat+"", 6)+" "+RNAFoldingTools.pad(fscExpPPfold+"", 6)); System.out.print("StatAl: "); printValues(pairedSitesExperimental, pairedSitesStatAlign); System.out.print("PPfold: "); printValues(pairedSitesExperimental, pairedSitesPPfold); System.out.println("---------------------------------------------------------------------"); System.out.println(); //System.out.println("XXXXXXXXXXXXXX"+resultsDir+name+".folds"); if(new File(resultsDir+name+".folds").exists()) { ArrayList<String> structures = PPFold.loadFolds(new File(resultsDir+name+".folds"), 4); ArrayList<String> values = new ArrayList<String>(); for(int k= 0 ; k < structures.size() ; k++) { String val = "" + Benchmarks.calculateFScore(pairedSitesExperimental, RNAFoldingTools.getPairedSitesFromDotBracketString(structures.get(k))); values.add(val); } //System.out.println(values); try { BufferedWriter buffer = new BufferedWriter(new FileWriter(resultsDir+name+".hist")); buffer.write("ST="+fscExpStat+"\n"); buffer.write("STW="+fscExpStatWeighted+"\n"); buffer.write("MPD="+fscExpStatMPD+"\n"); buffer.write("PP="+fscExpPPfold+"\n"); for(int l = 0 ; l < values.size() ; l++) { double val = Double.parseDouble(values.get(l)); if(Double.isNaN(val)) { val = 0; } buffer.write(val+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } String dir2 = "/home/michael/workspace/StatAlign/"; System.out.println(new File(dir2+name+".dat.fas.folds_e_obs")); if(new File(dir2+name+".dat.fas.folds_e_obs").exists()) { String dbn = PPFold.loadFolds(new File(dir2+name+".dat.fas.folds_e_obs"), 4).get(0); double fscSamplingObs = Benchmarks.calculateFScore(pairedSitesExperimental, RNAFoldingTools.getPairedSitesFromDotBracketString(dbn)); System.out.println("Alignment sampling obs FSC"+fscSamplingObs); dbn = PPFold.loadFolds(new File(dir2+name+".dat.fas.folds_e_exp"), 4).get(0); double fscSamplingExp = Benchmarks.calculateFScore(pairedSitesExperimental, RNAFoldingTools.getPairedSitesFromDotBracketString(dbn)); System.out.println("Alignment sampling exp FSC"+fscSamplingExp); } //System.out.println(structures); } } } }*/ public static void automatedTest2() { //String dir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/Distance/Datasets2/"; String dir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/"; //String resultsDir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/Results3/"; String resultsDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq2/"; File [] files = new File(resultsDir).listFiles(); for(int i = 0 ; i < files.length ; i++) { String fullName = files[i].getName(); if(fullName.endsWith(".dat.res")) { String name = fullName.substring(0, fullName.length()-8); String smallname = fullName.substring(0, fullName.length()-16); System.out.println(name); String truncname = smallname.replaceAll("_5seqs", ""); //File files = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/").listFiles(); File experimentalFile = new File(dir+truncname+".dat"); //File ourData = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNADATA1OURS"); //File statalignResultFile = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNAData1.dat.txt"); File ppfoldData = new File(dir+truncname+".dat.ct"); ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res")); StatAlignResult statalignWeightedResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res.weighted")); System.out.println(name); StatAlignResult mpdResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res.mpd")); String mappingSeq = ""; for(int j = 0 ;j < experimentalData.sequences.size() ; j++) { if(experimentalData.sequences.get(j).replaceAll("-", "").equals(statalignResult.sequence.replaceAll("-", ""))) { mappingSeq = experimentalData.sequences.get(j); } } //System.out.println(mappingSeq); //System.out.println(statalignResult.sequence); int [] pairedSitesExperimental = projectPairedSites(mappingSeq, experimentalData.pairedSites); int [] pairedSitesStatAlign = statalignResult.pairedSites; int [] pairedSitesStatAlignWeighted = statalignWeightedResult.pairedSites; int [] pairedSitesPPfold = projectPairedSites(mappingSeq, RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); int [] pairedSitesMPD = mpdResult.pairedSites; //printPairs(pairedSitesExperimental); //printPairs(pairedSitesStatAlign); //printPairs(pairedSitesPPfold); //System.out.println("X:"+statalignResult.sequence); //System.out.println("X:"+experimentalData.sequences.get(2)); System.out.println(">"+name + " (" + (statalignResult.sequence.replaceAll("-", "").length())+")"); System.out.println(" "+statalignResult.sequence.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("S:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesStatAlign)); System.out.println(" "+statalignResult.sequence.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("P:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesPPfold)); double sensExpStat = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlign); double sensExpPPfold = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesPPfold); double ppvExpStat = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlign); double ppvExpPPfold = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesPPfold); double fscExpStat = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlign); double fscExpPPfold = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesPPfold); double fscExpStatWeighted =Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlignWeighted); double fscExpStatMPD=Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesMPD); double fscSamplingObs = -1; System.out.println(RNAFoldingTools.pad("", 14)+RNAFoldingTools.pad("StatAl", 10)+RNAFoldingTools.pad("PPfold", 10)); System.out.println(RNAFoldingTools.pad("Senstivity", 14)+RNAFoldingTools.pad(sensExpStat+"", 6)+" "+RNAFoldingTools.pad(sensExpPPfold+"", 6)); System.out.println(RNAFoldingTools.pad("PPV", 14)+RNAFoldingTools.pad(ppvExpStat+"", 6)+" "+RNAFoldingTools.pad(ppvExpPPfold+"", 6)); System.out.println(RNAFoldingTools.pad("F-score", 14)+RNAFoldingTools.pad(fscExpStat+"", 6)+" "+RNAFoldingTools.pad(fscExpPPfold+"", 6)); System.out.print("StatAl: "); printValues(pairedSitesExperimental, pairedSitesStatAlign); System.out.print("PPfold: "); printValues(pairedSitesExperimental, pairedSitesPPfold); System.out.println("---------------------------------------------------------------------"); System.out.println(); //String dir2 = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq/"; String dir2 = resultsDir; //System.out.println(new File(dir2+smallname+".dat.fas.folds_e_obs").exists()); if(new File(dir2+smallname+".dat.fas.folds_e_obs").exists()) { String dbn = PPFold.loadFolds(new File(dir2+smallname+".dat.fas.folds_e_obs"), 4).get(0); fscSamplingObs = Benchmarks.calculateFScore(pairedSitesExperimental, RNAFoldingTools.getPairedSitesFromDotBracketString(dbn)); System.out.println("Alignment sampling obs FSC"+fscSamplingObs); dbn = PPFold.loadFolds(new File(dir2+smallname+".dat.fas.folds_e_exp"), 4).get(0); double fscSamplingExp = Benchmarks.calculateFScore(pairedSitesExperimental, RNAFoldingTools.getPairedSitesFromDotBracketString(dbn)); System.out.println("Alignment sampling exp FSC"+fscSamplingExp); } System.out.println(new File(resultsDir+name+".folds").exists()+"\t"+fscSamplingObs); //System.out.println("XXXXXXXXXXXXXX"+resultsDir+name+".folds"); if(new File(resultsDir+name+".folds").exists() && fscSamplingObs != -1) { ArrayList<String> structures = PPFold.loadFolds(new File(resultsDir+name+". >folds"), 4); ArrayList<String> values = new ArrayList<String>(); for(int k= 0 ; k < structures.size() ; k++) { String val = "" + Benchmarks.calculateFScore(pairedSitesExperimental, RNAFoldingTools.getPairedSitesFromDotBracketString(structures.get(k))); values.add(val); } //System.out.println(values); try { BufferedWriter buffer = new BufferedWriter(new FileWriter(resultsDir+name+".hist2")); buffer.write("ST="+fscExpStat+"\n"); buffer.write("STW="+fscExpStatWeighted+"\n"); buffer.write("MPD="+fscExpStatMPD+"\n"); buffer.write("PP="+fscExpPPfold+"\n"); buffer.write("STE="+fscSamplingObs+"\n"); for(int l = 0 ; l < values.size() ; l++) { double val = Double.parseDouble(values.get(l)); if(Double.isNaN(val)) { val = 0; } buffer.write(val+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } //System.out.println(structures); } } } } public static void performDistanceBenchmarks(Dataset dataset) { //File distanceFile = new File(System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/9seq2/dist_scores.txt"); /* String dataDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/Distance/Datasets2/"; String resultsDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/9seq2/"; File experimentalFile = new File(dataDir+name+".dat"); File ppfoldData = new File(dataDir+name+".dat.ct"); ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+dataName+".dat.res")); // StatAlignResult statalignWeightedResult = loadStatAlignResultFile(new File(resultsDir+"/"+dataName+".dat.res.weighted")); StatAlignResult mpdResult = loadStatAlignResultFile(new File(resultsDir+"/"+dataName+".dat.res.mpd")); String mappingSeq = ""; for(int j = 0 ;j < experimentalData.sequences.size() ; j++) { if(experimentalData.sequences.get(j).replaceAll("-", "").equals(statalignResult.sequence.replaceAll("-", ""))) { mappingSeq = experimentalData.sequences.get(j); } } int [] pairedSitesExperimental = projectPairedSites(mappingSeq, experimentalData.pairedSites); int [] pairedSitesStatAlign = statalignResult.pairedSites; int [] pairedSitesStatAlignWeighted = statalignWeightedResult.pairedSites; int [] pairedSitesPPfold = projectPairedSites(mappingSeq, RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); int [] pairedSitesMPD = mpdResult.pairedSites; double sensExpStat = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlign); double sensExpPPfold = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesPPfold); double sensExpMPD=Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesMPD); double ppvExpStat = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlign); double ppvExpPPfold = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesPPfold); double ppvExpMPD = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesMPD); double fscExpStat = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlign); double fscExpPPfold = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesPPfold); double fscExpStatWeighted =Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlignWeighted); double fscExpStatMPD=Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesMPD); System.out.println(textline+"\t"+fscExpStat+"\t"+fscExpStatMPD+"\t"+sensExpStat+"\t"+sensExpMPD+"\t"+ppvExpStat+"\t"+ppvExpMPD); }*/ } public static double getDouble(double val) { if(Double.isNaN(val)) { return 0; } return val; } public static int [] getPairedSites(char [] structure) { String s = ""; for(int i = 0 ; i < structure.length ; i++) { s += structure[i]; } return RNAFoldingTools.getPairedSitesFromDotBracketString(s); } public static void automatedTests() { String dir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/"; String resultsDir = "/home/michael/workspace/StatAlignExecute/output/"; File outFile = new File("Benchmarks.txt"); String suffix = "_5seqs"; if(!suffix.equals("_5seqs")) { dir = "/home/michael/Dropbox/RNA and StatAlign/Distance/Datasets2/"; } String header = "dataset\tposterior_avg\tsim_mpd_ref\t" +"average_length\t" +"fsc_sample_mean\tfsc_sample_median\tfsc_stat\tfsc_stat_weighted\tfsc_mpd\tfsc_ppfold\tfsc_entropy_exp\tfsc_entropy_obs\t" +"rel_stat\trel_stat_weighted\trel_mpd\trel_ppfold\trel_entropy_exp\trel_entropy_obs\t" +"rel2_stat\trel2_stat_weighted\trel2_mpd\trel2_ppfold\trel2_entropy_exp\trel2_entropy_obs\t" +"entropy_exp\tentropy_perc_exp\tentropy_max_exp\t" +"entropy_obs\tentropy_perc_obs\tentropy_max_obs\t" +"entropy_mpd\tentropy_perc_mpd\tentropy_max_mpd\t" +"entropy_ppfold\tentropy_perc_ppfold\tentropy_max_ppfold\t" +"entropy_sample_mean\tentropy_perc_sample_mean\tentropy_max_sample_mean\t" +"fsc_sample_alifold_mean\tfsc_sample_alifold_median\tfsc_alifold\tfsc_alifold_mpd\talifold_ref\trel3_stat\trel3_mpd\trel3_entropy_obs\t" +"fsc_combined\tcombined\tcombined\t" +"sen_sample_mean\tsen_sample_median\tsen_stat\tsen_stat_weighted\tsen_mpd\tsen_ppfold\tsen_entropy_exp\tsen_entropy_obs\t" +"ppv_sample_mean\tppv_sample_median\tppv_stat\tppv_stat_weighted\tppv_mpd\tppv_ppfold\tppv_entropy_exp\tppv_entropy_obs\t" +"sen_sample_alifold_mean\tsen_sample_alifold_median\tsen_alifold\tsen_alifold_mpd\talifold_ref\t" +"ppv_sample_alifold_mean\tppv_sample_alifold_median\tppv_alifold\tppv_alifold_mpd\talifold_ref\t"; RNAFoldingTools.writeToFile(outFile, header, false); File [] files = new File(resultsDir).listFiles(); for(int i = 0 ; i < files.length ; i++) { if(!files[i].getName().contains("480298957") || !files[i].getName().contains(suffix)) { continue; } if(!files[i].getName().endsWith(".serialized")) { continue; } Dataset dataset = null; try { System.out.println(files[i]); dataset = Dataset.loadDatasetResult(files[i]); } catch(Exception ex) { System.err.println("Should delete "+files[i]); continue; } //String resultsDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq2/"; //String dir = "/home/michael/Dropbox/RNA and StatAlign/Distance/Datasets2/"; System.out.println(dataset.title); String name = dataset.title.replaceAll("_seed.+", ""); String smallname = name.substring(0, name.length()-8); System.out.println(name); //String truncname = smallname; String truncname = smallname.replaceAll(suffix, ""); //File files = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/").listFiles(); File experimentalFile = new File(dir+truncname+".dat"); //File ourData = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNADATA1OURS"); //File statalignResultFile = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNAData1.dat.txt"); File ppfoldData = new File(dir+truncname+".dat.ct"); if(!suffix.equals("_5seqs")) { experimentalFile = new File(dir+truncname+suffix+".dat"); ppfoldData = new File(dir+truncname+suffix+".dat.ct"); } ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); //StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res")); //StatAlignResult statalignWeightedResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res.weighted")); System.out.println(name); //StatAlignResult mpdResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res.mpd")); String mappingSeq = ""; for(int j = 0 ;j < experimentalData.sequences.size() ; j++) { if(experimentalData.sequences.get(j).replaceAll("-", "").equals(dataset.pairedSitesRefSeq.replaceAll("-", ""))) { mappingSeq = experimentalData.sequences.get(j); } } //System.out.println(mappingSeq); //System.out.println(statalignResult.sequence); int [] pairedSitesExperimental = projectPairedSites(mappingSeq, experimentalData.pairedSites); int [] pairedSitesStatAlign = dataset.pairedSites; int [] pairedSitesStatAlignWeighted = dataset.pairedSitesWeighted; int [] pairedSitesPPfold = projectPairedSites(mappingSeq, RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); int [] pairedSitesMPD = dataset.pairedSitesMPD; //printPairs(pairedSitesExperimental); //printPairs(pairedSitesStatAlign); //printPairs(pairedSitesPPfold); //System.out.println("X:"+statalignResult.sequence); //System.out.println("X:"+experimentalData.sequences.get(2)); System.out.println(">"+name + " (" + (dataset.pairedSitesRefSeq.replaceAll("-", "").length())+")"); System.out.println(" "+dataset.pairedSitesRefSeq.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("S:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesStatAlign)); System.out.println(" "+dataset.pairedSitesRefSeq.replaceAll("-", "")); System.out.println("E:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesExperimental)); System.out.println("P:"+RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSitesPPfold)); System.out.println(dataset.posteriorsAverage+"\t"+dataset.mpdVsInputSim); //System.out.println("M:"+RNAFoldingTools.getDotBracketStringFromPairedSites(getPairedSites(dataset.matrixFolds.get(dataset.matrixFolds.size()-1).getStructure()))+"\t"+dataset.matrixFolds.get(dataset.matrixFolds.size()-1).finalmatrix.length); //double sensExpStat = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlign); //double sensExpPPfold = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesPPfold); //double ppvExpStat = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlign); //double ppvExpPPfold = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesPPfold); double fscExpStat = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlign)); double fscExpPPfold = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesPPfold)); double fscExpStatWeighted = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlignWeighted)); double fscExpStatMPD= getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesMPD)); double fscSamplingExp = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesEntropyExp)); double fscSamplingObs = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesEntropyObs)); double fscRNAalifold = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesRNAalifold)); double fscMatrixFold = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesMatrix)); double fscCombined = -1; if(dataset.pairedSitesCombined != null) { fscCombined = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesCombined)); } try { System.out.println(dataset.title); double fscRNAalifoldMPD = -1; if(dataset.pairedSitesRNAalifoldMPDProjected != null) { fscRNAalifoldMPD = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesRNAalifoldMPDProjected)); } double fscRNAalifoldRef = -1; if(dataset.pairedSitesRNAalifoldRefProjected != null) { fscRNAalifoldRef = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesRNAalifoldRefProjected)); } else { System.err.println("Could not load."); } double senExpStat = getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlign)); double senExpPPfold = getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesPPfold)); double senExpStatWeighted = getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlignWeighted)); double senExpStatMPD= getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesMPD)); double senSamplingExp = getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, dataset.pairedSitesEntropyExp)); double senSamplingObs = getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, dataset.pairedSitesEntropyObs)); double senRNAalifold = getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, dataset.pairedSitesRNAalifold)); double senCombined = -1; if(dataset.pairedSitesCombined != null) { senCombined = getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, dataset.pairedSitesCombined)); } System.out.println(dataset.title); double senRNAalifoldMPD = -1; if(dataset.pairedSitesRNAalifoldMPDProjected != null) { senRNAalifoldMPD = getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, dataset.pairedSitesRNAalifoldMPDProjected)); } double senRNAalifoldRef = -1; if(dataset.pairedSitesRNAalifoldRefProjected != null) { senRNAalifoldRef = getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, dataset.pairedSitesRNAalifoldRefProjected)); } else { System.err.println("Could not load."); } double ppvExpStat = getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlign)); double ppvExpPPfold = getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesPPfold)); double ppvExpStatWeighted = getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlignWeighted)); double ppvExpStatMPD= getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesMPD)); double ppvSamplingExp = getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, dataset.pairedSitesEntropyExp)); double ppvSamplingObs = getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, dataset.pairedSitesEntropyObs)); double ppvRNAalifold = getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, dataset.pairedSitesRNAalifold)); double ppvCombined = -1; if(dataset.pairedSitesCombined != null) { ppvCombined = getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, dataset.pairedSitesCombined)); } System.out.println(dataset.title); double ppvRNAalifoldMPD = -1; if(dataset.pairedSitesRNAalifoldMPDProjected != null) { getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, dataset.pairedSitesRNAalifoldMPDProjected)); } double ppvRNAalifoldRef = -1; if(dataset.pairedSitesRNAalifoldRefProjected != null) { ppvRNAalifoldRef = getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, dataset.pairedSitesRNAalifoldRefProjected)); } else { System.err.println("Could not load."); } //System.out.println(RNAFoldingTools.pad("", 14)+RNAFoldingTools.pad("StatAl", 10)+RNAFoldingTools.pad("PPfold", 10)); //System.out.println(RNAFoldingTools.pad("Senstivity", 14)+RNAFoldingTools.pad(sensExpStat+"", 6)+" "+RNAFoldingTools.pad(sensExpPPfold+"", 6)); //System.out.println(RNAFoldingTools.pad("PPV", 14)+RNAFoldingTools.pad(ppvExpStat+"", 6)+" "+RNAFoldingTools.pad(ppvExpPPfold+"", 6)); //System.out.println(RNAFoldingTools.pad("F-score", 14)+RNAFoldingTools.pad(fscExpStat+"", 6)+" "+RNAFoldingTools.pad(fscExpPPfold+"", 6)); System.out.print("StatAl: "); printValues(pairedSitesExperimental, pairedSitesStatAlign); System.out.print("PPfold: "); printValues(pairedSitesExperimental, pairedSitesPPfold); System.out.println("---------------------------------------------------------------------"); System.out.println(); ArrayList<Double> ppfoldFscValues = new ArrayList<Double>(); ArrayList<Double> rnaAlifoldFscValues = new ArrayList<Double>(); for(int k= 0 ; k < dataset.pairedSitesProjectedSamples.size() ; k++) { String val = "" + getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesProjectedSamples.get(k))); ppfoldFscValues.add(new Double(val)); } for(int k= 0 ; k < dataset.pairedSitesProjectedRnaAlifoldSamples.size() ; k++) { //System.out.println(dataset.pairedSitesProjectedSamples.size()+"\t"+dataset.pairedSitesProjectedRnaAlifoldSamples.size()); rnaAlifoldFscValues.add(getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesProjectedRnaAlifoldSamples.get(k)))); } double fscSampleMean = mean(ppfoldFscValues); double fscSampleMedian = getMedian(ppfoldFscValues); double fscRnaAlifoldSampleMean = mean(rnaAlifoldFscValues); double fscRnaAlifoldSampleMedian = getMedian(rnaAlifoldFscValues); ArrayList<Double> ppfoldSenValues = new ArrayList<Double>(); ArrayList<Double> rnaAlifoldSenValues = new ArrayList<Double>(); for(int k= 0 ; k < dataset.pairedSitesProjectedSamples.size() ; k++) { String val = "" + getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, dataset.pairedSitesProjectedSamples.get(k))); ppfoldSenValues.add(new Double(val)); } for(int k= 0 ; k < dataset.pairedSitesProjectedRnaAlifoldSamples.size() ; k++) { //System.out.println(dataset.pairedSitesProjectedSamples.size()+"\t"+dataset.pairedSitesProjectedRnaAlifoldSamples.size()); rnaAlifoldSenValues.add(getDouble(Benchmarks.calculateSensitivity(pairedSitesExperimental, dataset.pairedSitesProjectedRnaAlifoldSamples.get(k)))); } double senSampleMean = mean(ppfoldSenValues); double senSampleMedian = getMedian(ppfoldSenValues); double senRnaAlifoldSampleMean = mean(rnaAlifoldSenValues); double senRnaAlifoldSampleMedian = getMedian(rnaAlifoldSenValues); ArrayList<Double> ppfoldPpvValues = new ArrayList<Double>(); ArrayList<Double> rnaAlifoldPpvValues = new ArrayList<Double>(); for(int k= 0 ; k < dataset.pairedSitesProjectedSamples.size() ; k++) { String val = "" + getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, dataset.pairedSitesProjectedSamples.get(k))); ppfoldPpvValues.add(new Double(val)); } for(int k= 0 ; k < dataset.pairedSitesProjectedRnaAlifoldSamples.size() ; k++) { //System.out.println(dataset.pairedSitesProjectedSamples.size()+"\t"+dataset.pairedSitesProjectedRnaAlifoldSamples.size()); rnaAlifoldPpvValues.add(getDouble(Benchmarks.calculatePPV(pairedSitesExperimental, dataset.pairedSitesProjectedRnaAlifoldSamples.get(k)))); } double ppvSampleMean = mean(ppfoldPpvValues); double ppvSampleMedian = getMedian(ppfoldPpvValues); double ppvRnaAlifoldSampleMean = mean(rnaAlifoldPpvValues); double ppvRnaAlifoldSampleMedian = getMedian(rnaAlifoldPpvValues); ArrayList<Double> entropySamples = new ArrayList<Double>(); ArrayList<Double> entropyPercSamples = new ArrayList<Double>(); ArrayList<Double> entropyMaxSamples = new ArrayList<Double>(); for(int k = 0 ; k < dataset.sampledStructures.size() ; k++) { ResultBundle sampleBundle = dataset.sampledStructures.get(k); entropySamples.add(sampleBundle.entropyVal); entropyPercSamples.add(sampleBundle.entropyPercOfMax); entropyMaxSamples.add(sampleBundle.entropyMax); } double entropySampleMean = mean(entropySamples); double entropyPercSampleMean = mean(entropyPercSamples); double entropyMaxSampleMean = mean(entropyMaxSamples); double averageLength = getAverageLength(dataset.inputAlignment.sequences); String row = name+"\t"+dataset.posteriorsAverage+"\t"+dataset.mpdVsInputSim+"\t" +averageLength+"\t" +fscSampleMean+"\t"+fscSampleMedian+"\t"+fscExpStat+"\t"+fscExpStatWeighted+"\t"+fscExpStatMPD+"\t"+fscExpPPfold+"\t"+fscSamplingExp+"\t"+fscSamplingObs +"\t"+dataset.ppfoldReliabilityScoreSamplingAndAveraging+"\t"+dataset.ppfoldReliabilityScoreSamplingAndAveragingWeighted+"\t"+dataset.ppfoldReliabilityMPD +"\t"+dataset.resultBundlePPfold.reliabilityScore+"\t"+dataset.resultBundleEntropyExp.reliabilityScore+"\t"+dataset.resultBundleEntropyObs.reliabilityScore +"\t"+dataset.pairsOnlyReliabilityScoreSamplingAndAveraging+"\t"+dataset.pairsOnlyReliabilityScoreSamplingAndAveragingWeighted+"\t"+dataset.pairsOnlyReliabilityMPD +"\t"+dataset.resultBundlePPfold.pairsOnlyReliabilityScore+"\t"+dataset.resultBundleEntropyExp.pairsOnlyReliabilityScore+"\t"+dataset.resultBundleEntropyObs.pairsOnlyReliabilityScore +"\t"+dataset.resultBundleEntropyExp.entropyVal+"\t"+(dataset.resultBundleEntropyExp.entropyPercOfMax/100)+"\t"+dataset.resultBundleEntropyExp.entropyMax +"\t"+dataset.resultBundleEntropyObs.entropyVal+"\t"+(dataset.resultBundleEntropyObs.entropyPercOfMax/100)+"\t"+dataset.resultBundleEntropyObs.entropyMax +"\t"+dataset.resultBundleMPD.entropyVal+"\t"+(dataset.resultBundleMPD.entropyPercOfMax/100)+"\t"+dataset.resultBundleMPD.entropyMax +"\t"+dataset.resultBundlePPfold.entropyVal+"\t"+(dataset.resultBundlePPfold.entropyPercOfMax/100)+"\t"+dataset.resultBundlePPfold.entropyMax +"\t"+entropySampleMean+"\t"+(entropyPercSampleMean/100)+"\t"+entropyMaxSampleMean+"\t" +fscRnaAlifoldSampleMean+"\t"+fscRnaAlifoldSampleMedian+"\t"+fscRNAalifold+"\t"+fscRNAalifoldMPD+"\t"+fscRNAalifoldRef +"\t"+dataset.pairsOnlyReliabilityScoreSamplingAndAveragingPosteriorWeighted+"\t"+dataset.pairsOnlyMPDPosteriorWeighted+"\t"+dataset.pairsOnlyReliabilityEntropyObsPosteriorWeighted +"\t"+fscCombined+"\t"+dataset.pairsOnlyReliabilityScoreCombined+"\t"+dataset.ppfoldReliabilityScoreCombined +"\t"+senSampleMean+"\t"+senSampleMedian+"\t"+senExpStat+"\t"+senExpStatWeighted+"\t"+senExpStatMPD+"\t"+senExpPPfold+"\t"+senSamplingExp+"\t"+senSamplingObs +"\t"+ppvSampleMean+"\t"+ppvSampleMedian+"\t"+ppvExpStat+"\t"+ppvExpStatWeighted+"\t"+ppvExpStatMPD+"\t"+ppvExpPPfold+"\t"+ppvSamplingExp+"\t"+ppvSamplingObs +"\t"+senRnaAlifoldSampleMean+"\t"+senRnaAlifoldSampleMedian+"\t"+senRNAalifold+"\t"+senRNAalifoldMPD+"\t"+senRNAalifoldRef +"\t"+ppvRnaAlifoldSampleMean+"\t"+ppvRnaAlifoldSampleMedian+"\t"+ppvRNAalifold+"\t"+ppvRNAalifoldMPD+"\t"+ppvRNAalifoldRef +"\t"+fscMatrixFold; System.out.println("FSC " + fscCombined); ArrayList<Double> fuzzyDistances = new ArrayList<Double>(); for(int k = 1 ; k < dataset.cumulativeFuzzyAlignment.size() ; k++) { fuzzyDistances.add(FuzzyAlignment.distance(dataset.cumulativeFuzzyAlignment.get(k-1), dataset.cumulativeFuzzyAlignment.get(k))); } //System.out.println(dataset.title+"\t"+fuzzyDistances); /*try { BufferedWriter buffer = new BufferedWriter(new FileWriter("distances/"+dataset.title+"_fuzzy_")); for(int k = 0 ; k < fuzzyDistances.size() ; k++) { buffer.write(fuzzyDistances.get(k)+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); }*/ RNAFoldingTools.writeToFile(outFile, row, true); //System.out.println(header); //System.out.println(row); try { System.out.println("Writing"+resultsDir+name+".hist2"); BufferedWriter buffer = new BufferedWriter(new FileWriter(resultsDir+name+".hist2")); buffer.write("ST="+fscExpStat+"\n"); buffer.write("STW="+fscExpStatWeighted+"\n"); buffer.write("MPD="+fscExpStatMPD+"\n"); buffer.write("PP="+fscExpPPfold+"\n"); buffer.write("STE="+fscSamplingObs+"\n"); for(int l = 0 ; l < ppfoldFscValues.size() ; l++) { double val =ppfoldFscValues.get(l); if(Double.isNaN(val)) { val = 0; } buffer.write(val+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } catch(Exception ex) { ex.printStackTrace(); } try { File entropyFile = new File("/home/michael/Dropbox/RNA and StatAlign/Report/Entropy3/"+name+".txt"); BufferedWriter buffer = new BufferedWriter(new FileWriter(entropyFile)); buffer.write("no\tobs_val\tobs_perc\tobs_max\texp_val\texp_perc\texp_max\tsam_val\tsam_perc\tsam_max\tmatrix_val\tmatrix_perc\tmatrix_max\n"); for(int l = 0 ; l < dataset.sampledStructures.size() ; l++) { ResultBundle sample = dataset.sampledStructures.get(l); ResultBundle obsSample = dataset.cumulativeFuzzyObsResults.get(l); ResultBundle expSample = dataset.cumulativeFuzzyExpResults.get(l); ResultBundle matrixFold = dataset.matrixFolds.get(l); buffer.write(l+"\t"+obsSample.entropyVal+"\t"+obsSample.entropyPercOfMax+"\t"+obsSample.entropyMax+"\t"); buffer.write(expSample.entropyVal+"\t"+expSample.entropyPercOfMax+"\t"+expSample.entropyMax+"\t"); buffer.write(sample.entropyVal+"\t"+sample.entropyPercOfMax+"\t"+sample.entropyMax+"\t"); buffer.write(matrixFold.entropyVal+"\t"+matrixFold.entropyPercOfMax+"\t"+matrixFold.entropyMax); buffer.newLine(); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } } public static double getAverageLength(ArrayList<String> sequences) { double sum = 0; for(int i = 0 ; i < sequences.size() ; i++) { sum += sequences.get(i).replaceAll("-", "").length(); } return sum / ((double)sequences.size()); } public static void testVariation2() { String dir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/"; String resultsDir = "/home/michael/workspace/StatAlignExecute/output4/"; //File outFile = new File("Benchmarks.txt"); String suffix = "_5seqs"; if(!suffix.equals("_5seqs")) { dir = "/home/michael/Dropbox/RNA and StatAlign/Distance/Datasets2/"; } String header = "dataset\tposterior_avg\tsim_mpd_ref\t" +"fsc_sample_mean\tfsc_stat\tfsc_stat_weighted\tfsc_mpd\tfsc_ppfold\tfsc_entropy_exp\tfsc_entropy_obs\t" +"rel_stat\trel_stat_weighted\trel_mpd\trel_ppfold\trel_entropy_exp\trel_entropy_obs\t" +"rel2_stat\trel2_stat_weighted\trel2_mpd\trel2_ppfold\trel2_entropy_exp\trel2_entropy_obs\t" +"entropy_exp\tentropy_perc_exp\tentropy_max_exp\t" +"entropy_obs\tentropy_perc_obs\tentropy_max_obs\t" +"entropy_mpd\tentropy_perc_mpd\tentropy_max_mpd\t" +"entropy_ppfold\tentropy_perc_ppfold\tentropy_max_ppfold\t" +"entropy_sample_mean\tentropy_perc_sample_mean\tentropy_max_sample_mean\t"; //RNAFoldingTools.writeToFile(outFile, header, false); File [] files = new File(resultsDir).listFiles(); HashSet<String> usedDatasets = new HashSet<String>(); for(int z = 0 ; z < files.length ; z++) { String datasetName = files[z].getName().replaceAll("_seed.*", ""); if(!files[z].getName().contains(suffix) || !files[z].getName().endsWith(".serialized") || usedDatasets.contains(datasetName)) { continue; } usedDatasets.add(datasetName); ArrayList<Double> samplesFscVector = new ArrayList<Double>(); ArrayList<Double> statalignFscVector = new ArrayList<Double>(); ArrayList<Double> mpdFscVector = new ArrayList<Double>(); ArrayList<Double> ppfoldFscVector = new ArrayList<Double>(); ArrayList<Double> entropyObsFscVector = new ArrayList<Double>(); ArrayList<Double> entropyExpFscVector = new ArrayList<Double>(); for(int i = 0 ; i < files.length ; i++) { if(!files[i].getName().contains(datasetName) || !files[i].getName().contains(suffix) || !files[i].getName().endsWith(".serialized")) { continue; } //System.out.println(files[i]); if(files[i].getName().equals("TestRNAData27_5seqs.dat.fas_seed682981838.serialized")) { continue; } System.out.println(files[i]); Dataset dataset = Dataset.loadDatasetResult(files[i]); //String resultsDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq2/"; //String dir = "/home/michael/Dropbox/RNA and StatAlign/Distance/Datasets2/"; //System.out.println(dataset.title); String name = dataset.title.replaceAll("_seed.+", ""); String smallname = name.substring(0, name.length()-8); //System.out.println(name); //String truncname = smallname; String truncname = smallname.replaceAll(suffix, ""); //File files = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/").listFiles(); File experimentalFile = new File(dir+truncname+".dat"); //File ourData = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNADATA1OURS"); //File statalignResultFile = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNAData1.dat.txt"); File ppfoldData = new File(dir+truncname+".dat.ct"); if(!suffix.equals("_5seqs")) { experimentalFile = new File(dir+truncname+suffix+".dat"); ppfoldData = new File(dir+truncname+suffix+".dat.ct"); } ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); //StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res")); //StatAlignResult statalignWeightedResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res.weighted")); //System.out.println(name); //StatAlignResult mpdResult = loadStatAlignResultFile(new File(resultsDir+"/"+name+".dat.res.mpd")); String mappingSeq = ""; for(int j = 0 ;j < experimentalData.sequences.size() ; j++) { if(experimentalData.sequences.get(j).replaceAll("-", "").equals(dataset.pairedSitesRefSeq.replaceAll("-", ""))) { mappingSeq = experimentalData.sequences.get(j); } } //System.out.println(mappingSeq); //System.out.println(statalignResult.sequence); int [] pairedSitesExperimental = projectPairedSites(mappingSeq, experimentalData.pairedSites); int [] pairedSitesStatAlign = dataset.pairedSites; int [] pairedSitesStatAlignWeighted = dataset.pairedSitesWeighted; int [] pairedSitesPPfold = projectPairedSites(mappingSeq, RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); int [] pairedSitesMPD = dataset.pairedSitesMPD; double sensExpStat = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlign); double sensExpPPfold = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesPPfold); double ppvExpStat = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlign); double ppvExpPPfold = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesPPfold); double fscExpStat = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlign)); double fscExpPPfold = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesPPfold)); double fscExpStatWeighted = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlignWeighted)); double fscExpStatMPD= getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesMPD)); double fscSamplingExp = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesEntropyExp)); double fscSamplingObs = getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesEntropyObs)); ArrayList<Double> values = new ArrayList<Double>(); for(int k= 0 ; k < dataset.pairedSitesProjectedSamples.size() ; k++) { String val = "" + getDouble(Benchmarks.calculateFScore(pairedSitesExperimental, dataset.pairedSitesProjectedSamples.get(k))); values.add(new Double(val)); } double fscSampleMean = mean(values); samplesFscVector.addAll(values); statalignFscVector.add(fscExpStat); mpdFscVector.add(fscExpStatMPD); ppfoldFscVector.add(fscExpPPfold); entropyObsFscVector.add(fscSamplingObs); entropyExpFscVector.add(fscSamplingExp); ArrayList<Double> entropySamples = new ArrayList<Double>(); ArrayList<Double> entropyPercSamples = new ArrayList<Double>(); ArrayList<Double> entropyMaxSamples = new ArrayList<Double>(); for(int k = 0 ; k < dataset.sampledStructures.size() ; k++) { ResultBundle sampleBundle = dataset.sampledStructures.get(k); entropySamples.add(sampleBundle.entropyVal); entropyPercSamples.add(sampleBundle.entropyPercOfMax); entropyMaxSamples.add(sampleBundle.entropyMax); } double entropySampleMean = mean(entropySamples); double entropyPercSampleMean = mean(entropyPercSamples); double entropyMaxSampleMean = mean(entropyMaxSamples); String row = name+"\t"+dataset.posteriorsAverage+"\t"+dataset.mpdVsInputSim+"\t" +fscSampleMean+"\t"+fscExpStat+"\t"+fscExpStatWeighted+"\t"+fscExpStatMPD+"\t"+fscExpPPfold+"\t"+fscSamplingExp+"\t"+fscSamplingObs +"\t"+dataset.ppfoldReliabilityScoreSamplingAndAveraging+"\t"+dataset.ppfoldReliabilityScoreSamplingAndAveragingWeighted+"\t"+dataset.ppfoldReliabilityMPD +"\t"+dataset.resultBundlePPfold.reliabilityScore+"\t"+dataset.resultBundleEntropyExp.reliabilityScore+"\t"+dataset.resultBundleEntropyObs.reliabilityScore +"\t"+dataset.pairsOnlyReliabilityScoreSamplingAndAveraging+"\t"+dataset.pairsOnlyReliabilityScoreSamplingAndAveragingWeighted+"\t"+dataset.pairsOnlyReliabilityMPD +"\t"+dataset.resultBundlePPfold.pairsOnlyReliabilityScore+"\t"+dataset.resultBundleEntropyExp.pairsOnlyReliabilityScore+"\t"+dataset.resultBundleEntropyObs.pairsOnlyReliabilityScore +"\t"+dataset.resultBundleEntropyExp.entropyVal+"\t"+(dataset.resultBundleEntropyExp.entropyPercOfMax/100)+"\t"+dataset.resultBundleEntropyExp.entropyMax +"\t"+dataset.resultBundleEntropyObs.entropyVal+"\t"+(dataset.resultBundleEntropyObs.entropyPercOfMax/100)+"\t"+dataset.resultBundleEntropyObs.entropyMax +"\t"+dataset.resultBundleMPD.entropyVal+"\t"+(dataset.resultBundleMPD.entropyPercOfMax/100)+"\t"+dataset.resultBundleMPD.entropyMax +"\t"+dataset.resultBundlePPfold.entropyVal+"\t"+(dataset.resultBundlePPfold.entropyPercOfMax/100)+"\t"+dataset.resultBundlePPfold.entropyMax +"\t"+entropySampleMean+"\t"+(entropyPercSampleMean/100)+"\t"+entropyMaxSampleMean; /* ArrayList<Double> fuzzyDistances = new ArrayList<Double>(); for(int k = 1 ; k < dataset.cumulativeFuzzyAlignment.size() ; k++) { fuzzyDistances.add(FuzzyAlignment.distance(dataset.cumulativeFuzzyAlignment.get(k-1), dataset.cumulativeFuzzyAlignment.get(k))); } //System.out.println(dataset.title+"\t"+fuzzyDistances); try { BufferedWriter buffer = new BufferedWriter(new FileWriter("distances/"+dataset.title+"_fuzzy_")); for(int k = 0 ; k < fuzzyDistances.size() ; k++) { buffer.write(fuzzyDistances.get(k)+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); }*/ } DecimalFormat df = new DecimalFormat("0.000"); //System.out.println(); String vdir = "/home/michael/Dropbox/RNA and StatAlign/Report/V2/"; //System.out.println("Dataset\t\t\t\t\t#\tsample\tstat\tmpd\tppfold\ten_exp\ten_obs"); RNAFoldingTools.writeToFile(new File(vdir+"mean.txt"), datasetName+"\tmean\t\t"+mpdFscVector.size()+"\t"+df.format(mean(samplesFscVector))+"\t"+df.format(mean(statalignFscVector))+"\t"+df.format(mean(mpdFscVector))+"\t"+df.format(mean(ppfoldFscVector))+"\t"+df.format(mean(entropyExpFscVector))+"\t"+df.format(mean(entropyObsFscVector)), true); //System.out.println(datasetName+"\tmean\t\t"+mpdFscVector.size()+"\t"+df.format(mean(samplesFscVector))+"\t"+df.format(mean(statalignFscVector))+"\t"+df.format(mean(mpdFscVector))+"\t"+df.format(mean(ppfoldFscVector))+"\t"+df.format(mean(entropyExpFscVector))+"\t"+df.format(mean(entropyObsFscVector))); RNAFoldingTools.writeToFile(new File(vdir+"stdev.txt"), datasetName+"\tstdev\t\t"+mpdFscVector.size()+"\t"+df.format(stdev(samplesFscVector))+"\t"+df.format(stdev(statalignFscVector))+"\t"+df.format(stdev(mpdFscVector))+"\t"+df.format(stdev(ppfoldFscVector))+"\t"+df.format(stdev(entropyExpFscVector))+"\t"+df.format(stdev(entropyObsFscVector)), true); //System.out.println(datasetName+"\tstdev\t\t"+mpdFscVector.size()+"\t"+df.format(stdev(samplesFscVector))+"\t"+df.format(stdev(statalignFscVector))+"\t"+df.format(stdev(mpdFscVector))+"\t"+df.format(stdev(ppfoldFscVector))+"\t"+df.format(stdev(entropyExpFscVector))+"\t"+df.format(stdev(entropyObsFscVector))); double perc = 0.25; RNAFoldingTools.writeToFile(new File(vdir+"25th.txt"), datasetName+"\t25th\t\t"+mpdFscVector.size()+"\t"+df.format(getValue(samplesFscVector, perc))+"\t"+df.format(getValue(statalignFscVector, perc))+"\t"+df.format(getValue(mpdFscVector, perc))+"\t"+df.format(getValue(ppfoldFscVector, perc))+"\t"+df.format(getValue(entropyExpFscVector, perc))+"\t"+df.format(getValue(entropyObsFscVector, perc)), true); //System.out.println(datasetName+"\t25th\t\t"+mpdFscVector.size()+"\t"+df.format(getValue(samplesFscVector, perc))+"\t"+df.format(getValue(statalignFscVector, perc))+"\t"+df.format(getValue(mpdFscVector, perc))+"\t"+df.format(getValue(ppfoldFscVector, perc))+"\t"+df.format(getValue(entropyExpFscVector, perc))+"\t"+df.format(getValue(entropyObsFscVector, perc))); perc = 0.5; RNAFoldingTools.writeToFile(new File(vdir+"50th.txt"), datasetName+"\tmedian\t\t"+mpdFscVector.size()+"\t"+df.format(getValue(samplesFscVector, perc))+"\t"+df.format(getValue(statalignFscVector, perc))+"\t"+df.format(getValue(mpdFscVector, perc))+"\t"+df.format(getValue(ppfoldFscVector, perc))+"\t"+df.format(getValue(entropyExpFscVector, perc))+"\t"+df.format(getValue(entropyObsFscVector, perc)), true); //System.out.println(datasetName+"\tmedian\t\t"+mpdFscVector.size()+"\t"+df.format(getValue(samplesFscVector, perc))+"\t"+df.format(getValue(statalignFscVector, perc))+"\t"+df.format(getValue(mpdFscVector, perc))+"\t"+df.format(getValue(ppfoldFscVector, perc))+"\t"+df.format(getValue(entropyExpFscVector, perc))+"\t"+df.format(getValue(entropyObsFscVector, perc))); perc = 0.75; RNAFoldingTools.writeToFile(new File(vdir+"75th.txt"),datasetName+"\t75th\t\t"+mpdFscVector.size()+"\t"+df.format(getValue(samplesFscVector, perc))+"\t"+df.format(getValue(statalignFscVector, perc))+"\t"+df.format(getValue(mpdFscVector, perc))+"\t"+df.format(getValue(ppfoldFscVector, perc))+"\t"+df.format(getValue(entropyExpFscVector, perc))+"\t"+df.format(getValue(entropyObsFscVector, perc)), true); //System.out.println(datasetName+"\t75th\t\t"+mpdFscVector.size()+"\t"+df.format(getValue(samplesFscVector, perc))+"\t"+df.format(getValue(statalignFscVector, perc))+"\t"+df.format(getValue(mpdFscVector, perc))+"\t"+df.format(getValue(ppfoldFscVector, perc))+"\t"+df.format(getValue(entropyExpFscVector, perc))+"\t"+df.format(getValue(entropyObsFscVector, perc))); RNAFoldingTools.writeToFile(new File(vdir+"IQR.txt"),datasetName+"\tIQR\t\t"+mpdFscVector.size()+"\t"+df.format(IQR(samplesFscVector))+"\t"+df.format(IQR(statalignFscVector))+"\t"+df.format(IQR(mpdFscVector))+"\t"+df.format(IQR(ppfoldFscVector))+"\t"+df.format(IQR(entropyExpFscVector))+"\t"+df.format(IQR(entropyObsFscVector)), true); System.out.println(datasetName+"\tIQR\t\t"+mpdFscVector.size()+"\t"+df.format(IQR(samplesFscVector))+"\t"+df.format(IQR(statalignFscVector))+"\t"+df.format(IQR(mpdFscVector))+"\t"+df.format(IQR(ppfoldFscVector))+"\t"+df.format(IQR(entropyExpFscVector))+"\t"+df.format(IQR(entropyObsFscVector))); double sampleMean = mean(samplesFscVector); RNAFoldingTools.writeToFile(new File(vdir+"percent_greater_than_mean.txt"),datasetName+"\t%>mean\t\t"+mpdFscVector.size()+"\t"+df.format(percentGreaterOrEqualTo(sampleMean, samplesFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMean, statalignFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMean,mpdFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMean,ppfoldFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMean,entropyExpFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMean,entropyObsFscVector)), true); double sampleMedian = getValue(samplesFscVector, 0.5); RNAFoldingTools.writeToFile(new File(vdir+"percent_greater_than_median.txt"),datasetName+"\t%>median\t\t"+mpdFscVector.size()+"\t"+df.format(percentGreaterOrEqualTo(sampleMedian, samplesFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMedian, statalignFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMedian,mpdFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMedian,ppfoldFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMedian,entropyExpFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMedian,entropyObsFscVector)), true); try { BufferedWriter buffer = new BufferedWriter(new FileWriter(System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/Report/Variation/"+datasetName+".var")); buffer.write("Samples\tStatAlign\tMPD\tPPfold\tEntropy obs\n"); for(int k = 0 ; k < statalignFscVector.size() ; k++) { buffer.write(samplesFscVector.get(k)+"\t"+statalignFscVector.get(k)+"\t"+mpdFscVector.get(k)+"\t"+ppfoldFscVector.get(k)+"\t"+entropyObsFscVector.get(k)); buffer.newLine(); } for(int k = statalignFscVector.size() ; k < samplesFscVector.size() ; k++) { buffer.write(samplesFscVector.get(k)+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } } public static void testVariation() { String dir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/Distance/Datasets2/"; //String dir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/"; //String resultsDir = "/home/michael/Dropbox/RNA and StatAlign/TestRNAData/Results3/"; String resultsDir = "/home/michael/workspace/StatAlignExecute/output/"; File [] files = new File(resultsDir).listFiles(); HashSet<String> used = new HashSet<String>(); for(int i = 0 ; i < files.length ; i++) { String fullName = files[i].getName(); if(fullName.endsWith(".dat.res")) { String name = fullName.substring(0, fullName.length()-8); String smallname = fullName.substring(0, fullName.length()-16); //System.out.println(fullName); //System.out.println(name); //System.out.println(smallname); String originalName = name.replaceAll("_seed.+", ""); originalName = originalName.substring(0, originalName.length()-8); //System.out.println("O:"+originalName); File experimentalFile = new File(dir+originalName+".dat"); File ppfoldData = new File(dir+originalName+".dat.ct"); ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); if(used.contains(originalName)) { continue; } used.add(originalName); int runs = 0; ArrayList<Double> samplesFscVector = new ArrayList<Double>(); ArrayList<Double> statalignFscVector = new ArrayList<Double>(); ArrayList<Double> mpdFscVector = new ArrayList<Double>(); ArrayList<Double> ppfoldFscVector = new ArrayList<Double>(); ArrayList<Double> entropyObsFscVector = new ArrayList<Double>(); ArrayList<Double> entropyExpFscVector = new ArrayList<Double>(); for(int l = 0; l < files.length ; l++) { if(files[l].getName().startsWith(originalName) && files[l].getName().endsWith(".dat.res")) { String runName = files[l].getName().substring(0, files[l].getName().length()-8); runs++; StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+runName+".dat.res")); StatAlignResult statalignWeightedResult = loadStatAlignResultFile(new File(resultsDir+"/"+runName+".dat.res.weighted")); if(!new File(resultsDir+"/"+runName+".dat.res.mpd").exists()) { //System.err.println(new File(resultsDir+"/"+runName+".dat.res.mpd")+" is missing"); continue; } StatAlignResult mpdResult = loadStatAlignResultFile(new File(resultsDir+"/"+runName+".dat.res.mpd")); String mappingSeq = ""; for(int j = 0 ;j < experimentalData.sequences.size() ; j++) { if(experimentalData.sequences.get(j).replaceAll("-", "").equals(statalignResult.sequence.replaceAll("-", ""))) { mappingSeq = experimentalData.sequences.get(j); } } int [] pairedSitesExperimental = projectPairedSites(mappingSeq, experimentalData.pairedSites); int [] pairedSitesStatAlign = statalignResult.pairedSites; int [] pairedSitesStatAlignWeighted = statalignWeightedResult.pairedSites; int [] pairedSitesPPfold = projectPairedSites(mappingSeq, RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); int [] pairedSitesMPD = mpdResult.pairedSites; Scores statalignScores = Scores.getScores(pairedSitesExperimental, pairedSitesStatAlign); Scores statalignWeightedScores = Scores.getScores(pairedSitesExperimental, pairedSitesStatAlignWeighted); Scores ppfoldScores = Scores.getScores(pairedSitesExperimental, pairedSitesPPfold); Scores mpdScores = Scores.getScores(pairedSitesExperimental, pairedSitesMPD); double fscSamplingObs = -1; double fscSamplingExp = -1; File obsFile = new File(resultsDir+runName+".folds_e_obs"); if(obsFile.exists()) { String dbn = PPFold.loadFolds(obsFile, 4).get(0); fscSamplingObs = Benchmarks.calculateFScore(pairedSitesExperimental, RNAFoldingTools.getPairedSitesFromDotBracketString(dbn)); if(Double.isNaN(fscSamplingObs)) { fscSamplingObs = 0; } //System.out.println("Alignment sampling obs FSC"+fscSamplingObs); dbn = PPFold.loadFolds(new File(resultsDir+runName+".folds_e_exp"), 4).get(0); fscSamplingExp = Benchmarks.calculateFScore(pairedSitesExperimental, RNAFoldingTools.getPairedSitesFromDotBracketString(dbn)); if(Double.isNaN(fscSamplingExp)) { fscSamplingExp = 0; } //System.out.println("Alignment sampling exp FSC"+fscSamplingExp); } if(new File(resultsDir+runName+".folds").exists() && fscSamplingObs != -1) { ArrayList<String> structures = PPFold.loadFolds(new File(resultsDir+runName+".folds"), 4); ArrayList<Double> sampleValues = new ArrayList<Double>(); for(int k= 0 ; k < structures.size() ; k++) { String val = "" + Benchmarks.calculateFScore(pairedSitesExperimental, RNAFoldingTools.getPairedSitesFromDotBracketString(structures.get(k))); Double d = new Double(val); if(d.isNaN()) { d = new Double(0); } sampleValues.add(d); } samplesFscVector.addAll(sampleValues); statalignFscVector.add(statalignScores.fsc); ppfoldFscVector.add(ppfoldScores.fsc); mpdFscVector.add(mpdScores.fsc); entropyObsFscVector.add(fscSamplingObs); entropyExpFscVector.add(fscSamplingExp); //System.out.println(runName+"\t"+runs+"\t"+mean(values)+"\t"+statalignScores.fsc+"\t"+mpdScores.fsc+"\t"+ppfoldScores.fsc+"\t"+fscSamplingObs); /*try { BufferedWriter buffer = new BufferedWriter(new FileWriter(resultsDir+name+".hist2")); buffer.write("ST="+statalignScores.fsc+"\n"); buffer.write("STW="+statalignWeightedScores.fsc+"\n"); buffer.write("MPD="+mpdScores.fsc+"\n"); buffer.write("PP="+ppfoldScores.fsc+"\n"); buffer.write("STE="+fscSamplingObs+"\n"); for(int l = 0 ; l < values.size() ; l++) { double val = Double.parseDouble(values.get(l)); if(Double.isNaN(val)) { val = 0; } buffer.write(val+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); }*/ } } } DecimalFormat df = new DecimalFormat("0.000"); //System.out.println(); System.out.println("Dataset\t\t\t\t\t#\tsample\tstat\tmpd\tppfold\ten_exp\ten_obs"); System.out.println(originalName+"\tmean\t\t"+runs+"\t"+df.format(mean(samplesFscVector))+"\t"+df.format(mean(statalignFscVector))+"\t"+df.format(mean(mpdFscVector))+"\t"+df.format(mean(ppfoldFscVector))+"\t"+df.format(mean(entropyExpFscVector))+"\t"+df.format(mean(entropyObsFscVector))); System.out.println(originalName+"\tstdev\t\t"+runs+"\t"+df.format(stdev(samplesFscVector))+"\t"+df.format(stdev(statalignFscVector))+"\t"+df.format(stdev(mpdFscVector))+"\t"+df.format(stdev(ppfoldFscVector))+"\t"+df.format(stdev(entropyExpFscVector))+"\t"+df.format(stdev(entropyObsFscVector))); double perc = 0.25; System.out.println(originalName+"\tstdev\t\t"+runs+"\t"+df.format(stdev(samplesFscVector))+"\t"+df.format(stdev(statalignFscVector))+"\t"+df.format(stdev(mpdFscVector))+"\t"+df.format(stdev(ppfoldFscVector))+"\t"+df.format(stdev(entropyExpFscVector))+"\t"+df.format(stdev(entropyObsFscVector))); double sampleMean = mean(samplesFscVector); try { BufferedWriter buffer = new BufferedWriter(new FileWriter(System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/Report/Variation/"+originalName+".var")); buffer.write("Samples\tStatAlign\tMPD\tPPfold\tEntropy exp\tEntropy obs\n"); for(int k = 0 ; k < statalignFscVector.size() ; k++) { buffer.write(samplesFscVector.get(k)+"\t"+statalignFscVector.get(k)+"\t"+mpdFscVector.get(k)+"\t"+ppfoldFscVector.get(k)+"\t"+entropyExpFscVector.get(k)+"\t"+entropyObsFscVector.get(k)); buffer.newLine(); } for(int k = statalignFscVector.size() ; k < samplesFscVector.size() ; k++) { buffer.write(samplesFscVector.get(k)+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } //System.out.println(originalName+"\t% >= mean\t"+runs+"\t"+df.format(percentGreaterOrEqualTo(sampleMean, samplesFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMean, statalignFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMean, mpdFscVector))+"\t"+df.format(score(sampleMean, ppfoldFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMean, entropyExpFscVector))+"\t"+df.format(percentGreaterOrEqualTo(sampleMean, entropyObsFscVector))); //System.out.println(originalName+"\tscore\t\t"+runs+"\t"+df.format(score(sampleMean, samplesFscVector))+"\t"+df.format(score(sampleMean, statalignFscVector))+"\t"+df.format(score(sampleMean, mpdFscVector))+"\t"+df.format(score(sampleMean, ppfoldFscVector))+"\t"+df.format(score(sampleMean, entropyExpFscVector))+"\t"+df.format(score(sampleMean, entropyObsFscVector))); } } } public void performDistanceBenchmarks() { File distanceFile = new File(System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/9seq2/dist_scores.txt"); String dataDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/Distance/Datasets2/"; String resultsDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/9seq2/"; //File distanceFile = new File(System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq/dist_scores.txt"); //String dataDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/TestRNAData/"; //String resultsDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq/"; try { BufferedReader buffer = new BufferedReader(new FileReader(distanceFile)); String textline = null; String header = ""; while((textline = buffer.readLine()) != null) { if(textline.startsWith("Dataset")) { header = textline; //System.out.println(header); continue; } String [] split = textline.split("\t+"); String dataName = split[0]; String name = dataName.split("\\.")[0]; File experimentalFile = new File(dataDir+name+".dat"); File ppfoldData = new File(dataDir+name+".dat.ct"); ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+dataName+".dat.res")); // StatAlignResult statalignWeightedResult = loadStatAlignResultFile(new File(resultsDir+"/"+dataName+".dat.res.weighted")); StatAlignResult mpdResult = loadStatAlignResultFile(new File(resultsDir+"/"+dataName+".dat.res.mpd")); String mappingSeq = ""; for(int j = 0 ;j < experimentalData.sequences.size() ; j++) { if(experimentalData.sequences.get(j).replaceAll("-", "").equals(statalignResult.sequence.replaceAll("-", ""))) { mappingSeq = experimentalData.sequences.get(j); } } int [] pairedSitesExperimental = projectPairedSites(mappingSeq, experimentalData.pairedSites); int [] pairedSitesStatAlign = statalignResult.pairedSites; int [] pairedSitesStatAlignWeighted = statalignWeightedResult.pairedSites; int [] pairedSitesPPfold = projectPairedSites(mappingSeq, RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); int [] pairedSitesMPD = mpdResult.pairedSites; double sensExpStat = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesStatAlign); double sensExpPPfold = Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesPPfold); double sensExpMPD=Benchmarks.calculateSensitivity(pairedSitesExperimental, pairedSitesMPD); double ppvExpStat = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesStatAlign); double ppvExpPPfold = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesPPfold); double ppvExpMPD = Benchmarks.calculatePPV(pairedSitesExperimental, pairedSitesMPD); double fscExpStat = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlign); double fscExpPPfold = Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesPPfold); double fscExpStatWeighted =Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlignWeighted); double fscExpStatMPD=Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesMPD); System.out.println(textline+"\t"+fscExpStat+"\t"+fscExpStatMPD+"\t"+sensExpStat+"\t"+sensExpMPD+"\t"+ppvExpStat+"\t"+ppvExpMPD); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } public void performEntropy() { File distanceFile = new File(System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq2/dist_scores.txt"); //String dataDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/Distance/Datasets2/"; String resultsDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq2/"; //File distanceFile = new File(System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq/dist_scores.txt"); String dataDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/TestRNAData/"; //String resultsDir = System.getProperty("user.home")+ "/Dropbox/RNA and StatAlign/static/5seq/"; try { BufferedReader buffer = new BufferedReader(new FileReader(distanceFile)); String textline = null; String header = ""; while((textline = buffer.readLine()) != null) { if(textline.startsWith("Dataset")) { header = textline; //System.out.println(header); continue; } String [] split = textline.split("\t+"); String dataName = split[0]; String name = dataName.split("\\.")[0].split("_")[0]; File experimentalFile = new File(dataDir+name+".dat"); File ppfoldData = new File(dataDir+name+".dat.ct"); ExperimentalData experimentalData = Benchmarks.loadExperimentalStructure(experimentalFile); StatAlignResult statalignResult = loadStatAlignResultFile(new File(resultsDir+"/"+dataName+".dat.res")); // StatAlignResult statalignWeightedResult = loadStatAlignResultFile(new File(resultsDir+"/"+dataName+".dat.res.weighted")); StatAlignResult mpdResult = loadStatAlignResultFile(new File(resultsDir+"/"+dataName+".dat.res.mpd")); String mappingSeq = ""; for(int j = 0 ;j < experimentalData.sequences.size() ; j++) { if(experimentalData.sequences.get(j).replaceAll("-", "").equals(statalignResult.sequence.replaceAll("-", ""))) { mappingSeq = experimentalData.sequences.get(j); } } int [] pairedSitesExperimental = projectPairedSites(mappingSeq, experimentalData.pairedSites); int [] pairedSitesStatAlign = statalignResult.pairedSites; int [] pairedSitesStatAlignWeighted = statalignWeightedResult.pairedSites; int [] pairedSitesPPfold = projectPairedSites(mappingSeq, RNAFoldingTools.getPairedSitesFromCtFile(ppfoldData)); int [] pairedSitesMPD = mpdResult.pairedSites; Scores statalignScores = Scores.getScores(pairedSitesExperimental, pairedSitesStatAlign); Scores ppfoldScores = Scores.getScores(pairedSitesExperimental, pairedSitesPPfold); Scores mpdScores = Scores.getScores(pairedSitesExperimental, pairedSitesMPD); double fscExpStatWeighted =Benchmarks.calculateFScore(pairedSitesExperimental, pairedSitesStatAlignWeighted); EntropyData entropyDataExp = EntropyData.loadEntropyData(new File(resultsDir+dataName+"_entropy_fuzzy_exp.txt")); EntropyData entropyDataObs = EntropyData.loadEntropyData(new File(resultsDir+dataName+"_entropy_fuzzy_obs.txt")); EntropyData entropyDataSamples = EntropyData.loadEntropyData(new File(resultsDir+dataName+"_entropy_samples.txt")); double entropyDataExpLast = entropyDataExp.entropyVals.get(entropyDataExp.entropyVals.size()-1); double entropyDataObsLast = entropyDataObs.entropyVals.get(entropyDataExp.entropyVals.size()-1); double entropyDataObsPercLast = entropyDataObs.percentOfMax.get(entropyDataExp.percentOfMax.size()-1); double sampleMean = mean(entropyDataSamples.entropyVals); double sampleMeanPercentOfMax = mean(entropyDataSamples.percentOfMax); System.out.println(textline+"\t"+statalignScores.fsc+"\t"+mpdScores.fsc+"\t"+statalignScores.sen+"\t"+mpdScores.sen+"\t"+statalignScores.ppv+"\t"+mpdScores.ppv+"\t"+entropyDataObsLast+"\t"+sampleMean+"\t"+entropyDataObsPercLast+"\t"+sampleMeanPercentOfMax); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } /** * A class for holding entropy data */ static class EntropyData { ArrayList<Double> sampleNo = new ArrayList<Double>(); ArrayList<Double> entropyVals = new ArrayList<Double>(); ArrayList<Double> percentOfMax = new ArrayList<Double>(); ArrayList<Double> maxVals = new ArrayList<Double>(); public static EntropyData loadEntropyData(File entropyFile) { EntropyData entropyData = new EntropyData(); try { BufferedReader buffer = new BufferedReader(new FileReader(entropyFile)); buffer.readLine(); String textline = null; while((textline = buffer.readLine()) != null) { String [] split = textline.split("(\t)+"); entropyData.sampleNo.add(new Double(split[0])); entropyData.entropyVals.add(new Double(split[1])); entropyData.percentOfMax.add(new Double(split[2])); entropyData.maxVals.add(new Double(split[3])); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } return entropyData; } } public static double percentGreaterOrEqualTo(double x, ArrayList<Double> values) { double count = 0; for(int i = 0 ; i < values.size() ; i++) { if(values.get(i) >= x) { count++; } } return count / ((double)values.size()); } public static double score(double x, ArrayList<Double> values) { double count = 0; for(int i = 0 ; i < values.size() ; i++) { count += Math.min(values.get(i)/x, 1); } return count / ((double)values.size()); } /** * Calculates the mean from a list values * @param values * @return */ public static double mean(ArrayList<Double> values) { double sum = 0; for(int i = 0 ; i < values.size() ; i++) { sum += values.get(i); } return sum / ((double) values.size()); } /** * Calculates the standard deviation of a list of values. * @param values * @return */ public static double stdev(ArrayList<Double> values) { double mean = mean(values); double stdev = 0; for(int i = 0 ; i < values.size() ; i++) { stdev += Math.pow(values.get(i) - mean, 2); } stdev /= ((double)values.size()-1); return Math.sqrt(stdev); } public static void printPairs(int [] pairedSites) { String ret = ""; for(int i = 0 ; i < pairedSites.length ; i++) { ret += (i+1) + "\t" + pairedSites[i]+"\n"; } System.out.println(ret); } /** * Takes an integer array of paired sites and deletes nucleotide positions corresponding to gaps in the aligned sequence. A site whose base-pairing partner is deleted becomes single-stranded, unless it was deleted as well. * @param alignedSequence * @param pairedSites * @return */ public static int [] projectPairedSites(String alignedSequence, int [] pairedSites) { int [] ungappedToGapped = Mapping.getUngappedToGappedMapping(alignedSequence); int [] gappedToUngapped = Mapping.getGappedToUngappedMapping(alignedSequence); int [] projectedPairedSites = new int[ungappedToGapped.length]; for(int i = 0 ; i < pairedSites.length ; i++) { if(pairedSites[i] != 0) // if paired, map { int x = gappedToUngapped[i]; if(x != -1); { int y = Math.max(0, gappedToUngapped[pairedSites[i]-1]) + 1; projectedPairedSites[x] = y; } } } return projectedPairedSites; } public static void saveAsFasta(ExperimentalData expData, File outFile) { saveAsFasta(expData.sequences, expData.sequenceNames, outFile); } /** * Save a list of sequences and sequence names as a FASTA file. * @param sequences * @param sequenceNames * @param outFile */ public static void saveAsFasta(ArrayList<String> sequences, ArrayList<String> sequenceNames, File outFile) { try { BufferedWriter buffer = new BufferedWriter(new FileWriter(outFile)); for(int i = 0 ; i < sequences.size() ; i++) { buffer.write(">"+sequenceNames.get(i)+"\n"); buffer.write(sequences.get(i)+"\n"); } buffer.close(); } catch(IOException ex) { ex.printStackTrace(); } } /** * Loads the experimentally-derived secondary structures and alignments. * @param realStructureFile the name of the file to load. * @return an ExperimentalData object representing the experimental structure and alignment. */ public static ExperimentalData loadExperimentalStructure(File realStructureFile) { try { ExperimentalData expData = new ExperimentalData(); BufferedReader buffer = new BufferedReader(new FileReader(realStructureFile)); String realStructureDBS = buffer.readLine(); expData.pairedSites = RNAFoldingTools.getPairedSitesFromDotBracketString(realStructureDBS, '<', '>'); ArrayList<String> sequences = new ArrayList<String>(); ArrayList<String> sequenceNames = new ArrayList<String>(); String textline = null; String sequence = ""; while ((textline = buffer.readLine()) != null) { if (textline.startsWith(">")) { sequenceNames.add(textline.substring(1)); if (!sequence.equals("")) { sequences.add(sequence.toUpperCase()); sequence = ""; } } else { sequence += textline.trim(); } } buffer.close(); if (!sequence.equals("")) { sequences.add(sequence); } // replace .'s with -'s. for(int i = 0 ; i < sequences.size() ; i++) { sequences.set(i, sequences.get(i).replaceAll("\\.", "-")); } expData.sequences = sequences; expData.sequenceNames = sequenceNames; return expData; } catch(IOException ex) { ex.printStackTrace(); } return null; } /** * Given an array of paired sites corresponding to the real structure * and an array corrsponding to the predicted structure returns the sensitivity. * @param realPairedSites * @param predictedPairedSites * @return */ public static double calculateSensitivity (int [] realPairedSites, int [] predictedPairedSites) {/* The sensitivity for a predicted structure is the percentage of base pairs in the experimental structure that are also present in the predicted structure.*/ /*double totalRealBasePairs = 0; double correctlyPredicted = 0; for(int i = 0 ; i < realPairedSites.length ; i++) { if(realPairedSites[i] >= i + 1 && realPairedSites[i] != 0) { totalRealBasePairs++; if(realPairedSites[i] == predictedPairedSites[i]) { correctlyPredicted++; } } } //System.out.println(correctlyPredicted); //System.out.println(totalRealBasePairs); //return correctlyPredicted / totalRealBasePairs; double [] ret = getValues(realPairedSites, predictedPairedSites); return ret[0] / (ret[0]+ret[3]);*/ double count = 0; double total = 0; for(int i = 0 ; i < realPairedSites.length ; i++) { if(realPairedSites[i] != 0) { total++; if(realPairedSites[i] == predictedPairedSites[i]) { count++; } } } return count / total; } /** * Given an array of paired sites corresponding to the real structure * and an array corresponding to the predicted structure returns the PPV. * @param realPairedSites * @param predictedPairedSites * @return */ public static double calculatePPV (int [] realPairedSites, int [] predictedPairedSites) { /* The PPV is the percentage of base pairs in the predicted structure that are in the experimental structure.*/ /* double correctlyPredicted = 0; // true positives double incorrectlyPredicted = 0; // false positives for(int i = 0 ; i < realPairedSites.length ; i++) { if(realPairedSites[i] >= i + 1 && realPairedSites[i] != 0 && realPairedSites[i] == predictedPairedSites[i]) { correctlyPredicted++; } else if(predictedPairedSites[i] >= i + 1 && predictedPairedSites[i] != 0 && realPairedSites[i] != predictedPairedSites[i]) { incorrectlyPredicted++; } } //return correctlyPredicted / (correctlyPredicted + incorrectlyPredicted); double [] ret = getValues(realPairedSites, predictedPairedSites); return ret[0] / (ret[0]+ret[2]);*/ double count = 0; double total = 0; for(int i = 0 ; i < realPairedSites.length ; i++) { if(predictedPairedSites[i] != 0) { total++; if(predictedPairedSites[i] == realPairedSites[i]) { count++; } } } return count / total; } /** * Given an array of paired sites corresponding to the real structure * and an array corresponding to the predicted structure returns the F-score. * @param realPairedSites * @param predictedPairedSites * @return */ public static double calculateFScore (int [] realPairedSites, int [] predictedPairedSites) { double sensitivity = calculateSensitivity(realPairedSites, predictedPairedSites); double ppv = calculatePPV(realPairedSites, predictedPairedSites); return (2 * sensitivity * ppv)/(sensitivity+ppv); } public static double [] getValues(int [] realPairedSites, int [] predictedPairedSites) { double TP = 0; // true positives double TN = 0; double FP = 0; // false positives double FN = 0; for(int i = 0 ; i < realPairedSites.length ; i++) { if(i + 1 < realPairedSites[i] && realPairedSites[i] == predictedPairedSites[i]) { TP++; } else if(i + 1 < realPairedSites[i] && realPairedSites[i] != 0 && predictedPairedSites[i] != realPairedSites[i]) { FN++; } else if(predictedPairedSites[i] != 0 && realPairedSites[i] == 0) { FP++; } else if(realPairedSites[i] == 0 && predictedPairedSites[i] == 0) { TN++; } } double [] ret = {TP, TN, FP, FN}; return ret; } public static void printValues(int [] realPairedSites, int [] predictedPairedSites) { double [] ret = getValues(realPairedSites, predictedPairedSites); System.out.println("TP="+ret[0]+" TN="+ret[1]+" FP="+ret[2]+" FN="+ret[3]); } public static StatAlignResult loadStatAlignResultFile(File resultFile) { try { BufferedReader buffer = new BufferedReader(new FileReader(resultFile)); StatAlignResult result = new StatAlignResult(); result.sequence = buffer.readLine(); result.dbnStructure = buffer.readLine(); result.pairedSites = RNAFoldingTools.getPairedSitesFromDotBracketString(result.dbnStructure, '(', ')'); buffer.close(); return result; } catch(IOException ex) { ex.printStackTrace(); } return null; } static class Scores { int [] pairedExperimental; int [] pairedPredicted; double fsc; double sen; double ppv; public static Scores getScores(int [] pairedExperimental, int [] pairedPredicted) { Scores scores = new Scores(); scores.pairedExperimental = pairedExperimental; scores.pairedPredicted = pairedPredicted; scores.fsc = Benchmarks.calculateFScore(pairedExperimental, pairedPredicted); scores.sen = Benchmarks.calculateSensitivity(pairedExperimental, pairedPredicted); scores.ppv = Benchmarks.calculatePPV(pairedExperimental, pairedPredicted); if(Double.isNaN(scores.fsc)) { scores.fsc = 0; } if(Double.isNaN(scores.ppv)) { scores.ppv = 0; } if(Double.isNaN(scores.sen)) { scores.sen = 0; } return scores; } } /** * Calculates the Interquartile Range from a given list of values. * @param values * @return */ public static double IQR(ArrayList<Double> values) { return getValue(values, 0.75) - getValue(values, 0.25); } /** * Returns the median from a list of values * @param values * @return */ public static double getMedian(ArrayList<Double> values) { ArrayList<Double> sortedValues = (ArrayList<Double>) values.clone(); Collections.sort(sortedValues); double length = ((double)values.size()-1)/2; int floor = (int) Math.floor(length); int ceil = (int) Math.ceil(length); return (sortedValues.get(floor)+sortedValues.get(ceil))/2; } /** * Returns a value at a given percentile. * @param values * @param percentile * @return */ public static double getValue(ArrayList<Double> values, double percentile) { ArrayList<Double> sortedValues = (ArrayList<Double>) values.clone(); Collections.sort(sortedValues); int lower = (int)(percentile * ((double)sortedValues.size()-1)); int upper = (int)Math.ceil(percentile * ((double)sortedValues.size()-1)); return (sortedValues.get(lower)+sortedValues.get(upper))/2; } /** * Returns the percentile of the specified value in the specified list of values. * @param values * @param x * @return */ public static double percentile(ArrayList<Double> values, double x) { ArrayList<Double> sortedValues = (ArrayList<Double>) values.clone(); Collections.sort(sortedValues); double minIndex = 0; for(int i = 0 ; i < sortedValues.size() ; i++) { if(x >= sortedValues.get(i)) { minIndex = i+1; break; } } double maxIndex = sortedValues.size(); for(int i = sortedValues.size() - 1 ; i >= 0 ; i--) { if(x <= sortedValues.get(i)) { maxIndex = i+1; break; } } double pos = (minIndex+maxIndex)/2; return pos / ((double)(sortedValues.size())); } }