package statalign.postprocess.utils;
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
* @author Michael
*/
public class RNAFoldingTools {
public static void main(String[] args) {
ArrayList<String> sequences = new ArrayList<String>();
sequences.add("ACT--CC-");
sequences.add("ACTC-CCG");
sequences.add("ACTCTCCG");
System.out.println(RNAFoldingTools.getReferenceSequence(sequences, 9));
// Generate an example "Base pairing probability" matrix
String [] structures = {"(((..(..))))(..)",
"(((..(..))))(..)",
"(((..(..))))(..)"};
double[][] basePairCount = getBasePairCountMatrix(structures);
double[] singleBaseCount = getSingleBaseCount(structures);
// Perform the posterior decoding
RNAFoldingTools rnaTools = new RNAFoldingTools();
//int [] pairedSites = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(basePairCount); // fails for this example, because not base-pairing probability matrix
//int [] pairedSites = rnaTools.getPosteriorDecodingConsensusStructureMultiThreaded(basePairCount, singleBaseCount); // this works
int [] pairedSites = RNAFoldingTools.getPosteriorDecodingConsensusStructure(basePairCount, singleBaseCount);
// Print the list of paired sites for the posterior decoding structure
for(int i = 0 ; i < pairedSites.length ; i++)
{
System.out.println((i+1)+"\t"+pairedSites[i]);
}
// Print a dot bracket string representation of the posterior decoding structure
System.out.println("Dot bracket structure: " + getDotBracketStringFromPairedSites(pairedSites));
//System.out.println("Dot bracket structure (sanity check): " + getDotBracketStringFromPairedSites(getPairedSitesFromDotBracketString(getDotBracketStringFromPairedSites(pairedSites))));
}
/**
* General purpose class for holding a pair of integers.
*/
static class Pair {
int x;
int y;
public Pair(int x, int y) {
this.x = x;
this.y = y;
}
@Override
public String toString() {
return "(" + x + ", " + y + ")";
}
}
public static final double emptyValue = Double.MAX_VALUE;
public static double [] getSingleBaseProb(double[][] basePairProb)
{
double [] singleBaseProb = new double[basePairProb.length];
for(int i = 0 ; i < basePairProb.length ; i++)
{
singleBaseProb[i] = 1;
for(int j = 0 ; j < basePairProb[0].length ; j++)
{
singleBaseProb[i] -= basePairProb[i][j];
}
}
return singleBaseProb;
}
public static int [] getPosteriorDecodingConsensusStructure(float[][] basePairProb) {
return getPosteriorDecodingConsensusStructure(getDoubleMatrix(basePairProb));
}
public static int [] getPosteriorDecodingConsensusStructure(double[][] basePairProb) {
double [] singleBaseProb = new double[basePairProb.length];
for(int i = 0 ; i < basePairProb.length ; i++)
{
singleBaseProb[i] = 1;
for(int j = 0 ; j < basePairProb[0].length ; j++)
{
singleBaseProb[i] -= basePairProb[i][j];
}
}
return getPosteriorDecodingConsensusStructure(basePairProb, singleBaseProb);
}
/**
* A single-threaded method for generating the posterior-decoding structure.
* @param basePairProb a NxN matrix of base-pairing probabilities.
* @param singleBaseProb an array of length N representing probabilities for unpaired bases.
* @return an array of paired positions. Where (i, array[i]) represents a nucleotide pairing between nucleotides (i+1, array[i]), if array[i] = 0, then (i+1) is unpaired.
*/
public static int [] getPosteriorDecodingConsensusStructure(double[][] basePairProb, double[] singleBaseProb) {
double[][] eMatrix = new double[basePairProb.length][basePairProb[0].length];
for (int i = 0; i < eMatrix.length; i++) {
for (int j = 0; j < eMatrix.length; j++) {
eMatrix[i][j] = RNAFoldingTools.emptyValue;
}
}
int[] pairedWith = new int[eMatrix.length];
int[][] S = new int[basePairProb.length][basePairProb[0].length];
recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, S, 0, eMatrix.length-1);
//printMatrix(eMatrix);
//System.out.println();
//printMatrix(S);
// writeMatrix(S, new File("e.matrix"));
//writeMatrix(S, new File("s.matrix"));
traceBack(S, 0, eMatrix.length - 1, pairedWith);
return pairedWith;
}
/**
* Performs posterior-decoding using multi-threading.
* @param basePairProb a NxN matrix of base-pairing probabilities.
* @param singleBaseProb an array of length N representing probabilities for unpaired bases.
* @return an instance of MultiThreadedPosteriorDecoding, which has public access to various useful variables (e.g. paired nucleotides of the MEA structure).
*/
public MultiThreadedPosteriorDecoding performPosteriorDecodingMultiThreaded(double[][] basePairProb, double[] singleBaseProb) {
MultiThreadedPosteriorDecoding m = new MultiThreadedPosteriorDecoding(basePairProb, singleBaseProb);
return m;
}
/**
* Performs posterior-decoding using multi-threading.
* @param basePairProb a NxN matrix of base-pairing probabilities. Assumes that sum(row) <= 1, in order to calculate the single base probabilities.
* @return an instance of MultiThreadedPosteriorDecoding, which has public access to various useful variables (e.g. paired nucleotides of the MEA structure).
*/
public MultiThreadedPosteriorDecoding performPosteriorDecodingMultiThreaded(double[][] basePairProb) {
double [] singleBaseProb = new double[basePairProb.length];
for(int i = 0 ; i < basePairProb.length ; i++)
{
singleBaseProb[i] = 1;
for(int j = 0 ; j < basePairProb[0].length ; j++)
{
singleBaseProb[i] -= basePairProb[i][j];
}
}
/*for(int i = 0 ; i < singleBaseProb.length ; i++)
{
System.out.println((i+1)+"\t"+singleBaseProb[i]);
}*/
return new MultiThreadedPosteriorDecoding(basePairProb, singleBaseProb);
}
/**
* Returns the posterior-decoding consensus structure.
* @param basePairProb a NxN matrix of base-pairing probabilities.
* @return the posterior-decoding consensus structure.
*/
public int [] getPosteriorDecodingConsensusStructureMultiThreaded(double[][] basePairProb, double [] singleBaseProb)
{
return performPosteriorDecodingMultiThreaded(basePairProb, singleBaseProb).pairedWith;
}
public int [] getPosteriorDecodingConsensusStructureMultiThreaded(float[][] basePairProb) {
return getPosteriorDecodingConsensusStructureMultiThreaded(getDoubleMatrix(basePairProb));
}
/**
* Returns the posterior-decoding consensus structure.
* @param basePairProb a NxN matrix of base-pairing probabilities. Assumes that sum(row) <= 1, in order to calculate the single base probabilities.
* @return the posterior-decoding consensus structure.
*/
public int [] getPosteriorDecodingConsensusStructureMultiThreaded(double[][] basePairProb)
{
return performPosteriorDecodingMultiThreaded(basePairProb).pairedWith;
}
/**
* A recursive method that fills the dynamic programming matrix for generating the posterior decoding structure.
* @param basePairProb a NxN matrix of base-pairing probabilities.
* @param singleBaseProb a vector of length N representing the probability that a base at a position is unpaired.
* @param eMatrix the dynamic programming matrix for the posterior-decoding.
* @param i the start position of the window.
* @param j the end position of the window.
* @param pairedWith an array of paired positions. Where (i, array[i]) represents a pairing between nucleotides (i+1, array[i]), if array[i] = 0, then (i+1) is unpaired.
* @return the value of the eMatrix at position (i, j).
*/
private static double recursePosteriorDecoding(double[][] basePairProb, double[] singleBaseProb, double[][] eMatrix, int i, int j, int[] pairedWith) {
if (i > j) {
return 0;
}
if (eMatrix[i][j] != RNAFoldingTools.emptyValue) {
return eMatrix[i][j];
}
if (i == j) {
eMatrix[i][j] = singleBaseProb[i];
return eMatrix[i][j];
}
double u1 = recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, i + 1, j, pairedWith) + singleBaseProb[i];
double p1 = recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, i + 1, j - 1, pairedWith) + basePairProb[i][j]; // * 2
double u2 = recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, i, j - 1, pairedWith) + singleBaseProb[j];
double p2 = 0;
for (int k = i; k < j; k++) {
// remember K
p2 = Math.max(p2, recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, i, k, pairedWith) + recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, k + 1, j, pairedWith));
}
eMatrix[i][j] = Math.max(u1, Math.max(p1, Math.max(u2, p2)));
if (p1 > u1 && p1 > u2 && p1 > p2 && pairedWith != null) {
// if(pairedWith[i] == 0 && pairedWith[j] == 0)
//{
pairedWith[i] = j + 1;
pairedWith[j] = i + 1;
//}
System.out.println("B"+i+"\t"+j);
}
return eMatrix[i][j];
}
/**
* A recursive method that fills the dynamic programming matrix for generating the posterior decoding structure.
* @param basePairProb a NxN matrix of base-pairing probabilities.
* @param singleBaseProb a vector of length N representing the probability that a base at a position is unpaired.
* @param eMatrix the dynamic programming matrix for the posterior-decoding.
* @param i the start position of the window.
* @param j the end position of the window.
* @param pairedWith an array of paired positions. Where (i, array[i]) represents a pairing between nucleotides (i+1, array[i]), if array[i] = 0, then (i+1) is unpaired.
* @return the value of the eMatrix at position (i, j).
*/
private static double recursePosteriorDecoding(double[][] basePairProb, double[] singleBaseProb, double[][] eMatrix, int [][] S, int i, int j) {
if (i > j) {
return 0;
}
if (eMatrix[i][j] != RNAFoldingTools.emptyValue) {
return eMatrix[i][j];
}
if (i == j) {
eMatrix[i][j] = singleBaseProb[i];
return eMatrix[i][j];
}
double u1 = recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, S, i + 1, j) + singleBaseProb[i];
double p1 = recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, S, i + 1, j - 1) + 2*basePairProb[i][j]; // * 2
double u2 = recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, S, i, j - 1) + singleBaseProb[j];
double p2 = 0;
int max_k = i;
for (int k = i; k < j; k++) {
double p2k = recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, S, i, k) + recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, S, k + 1, j);
// remember K
if(p2k > p2)
{
p2 = p2k;
max_k = k;
}
}
/*
double max = 0;
if(p1 > u1 && p1 > p2 && p1 > u2)
{
max = p1;
S[i][j] = -3;
}
else
if(u1 > p2 && u1 > u2)
{
max = u1;
S[i][j] = -1;
}
else
if(p2 > u2)
{
max = p2;
S[i][j] = max_k;
}
else
{
max = u2;
S[i][j] = -2;
}*/
double max = 0;
if(u1 > p1 && u1 > p2 && u1 > u2)
{
max = u1;
S[i][j] = -1;
}
else
if(u2 > p1 && u2 > p2)
{
max = u2;
S[i][j] = -2;
}
else
if(p1 > p2)
{
max = p1;
S[i][j] = -3; // paired
}
else
{
max = p2;
S[i][j] = max_k;
}
eMatrix[i][j] = max;
return eMatrix[i][j];
}
public static void traceBack(int [][] S, int i, int j, int [] pairedWith)
{
if(i >= j)
{
// do nothing
}
else
if(S[i][j] == -3)
{
pairedWith[i] = j + 1;
pairedWith[j] = i + 1;
traceBack(S, i+1, j-1, pairedWith);
}
else
{
traceBack(S, i, S[i][j], pairedWith);
traceBack(S, S[i][j]+1, j, pairedWith);
}
}
/**
* Given a String array of dot-bracket structures, fills a vector which
* counts the number of times a specific nucleotide position is unpaired.
* For testing purposes only.
* @param dotBracketStructures
* @return an array of unpaired counts.
*/
public static double[] getSingleBaseCount(String[] dotBracketStructures) {
double[] singleBaseCount = new double[dotBracketStructures[0].length()];
for (int i = 0; i < dotBracketStructures.length; i++) {
String structure = dotBracketStructures[i];
for (int j = 0; j < structure.length(); j++) {
if (structure.charAt(j) == '.') // if unpaired
{
singleBaseCount[j]++;
}
}
}
return singleBaseCount;
}
/**
* Given an String array of dot-bracket structures, fills a matrix which
* counts the number of times a pair of nucleotides is base-paired in each structure.
* For testing purposes only.
* @param dotBracketStructures
* @return a double matrix containing base-pairing counts.
*/
public static double[][] getBasePairCountMatrix(String[] dotBracketStructures) {
double[][] basePairMatrix = new double[dotBracketStructures[0].length()][dotBracketStructures[0].length()];
for (int i = 0; i < dotBracketStructures.length; i++) {
Stack<Integer> stack = new Stack<Integer>();
String structure = dotBracketStructures[i];
for (int j = 0; j < structure.length(); j++) {
if (structure.charAt(j) == '(') {
stack.push(j);
} else if (structure.charAt(j) == ')') {
int x = stack.pop();
int y = j;
basePairMatrix[x][y]++;
basePairMatrix[y][x]++;
}
}
}
return basePairMatrix;
}
/**
* A helper method which prints double matrices.
* For testing purposes only.
* @param matrix
*/
public static void printMatrix(double[][] matrix) {
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix[0].length; j++) {
System.out.print(pad(matrix[i][j] + "", 4) + " ");
}
System.out.println();
}
}
/**
* A helper method which writes double matrices to files.
* For testing purposes only.
* @param matrix
*/
public static void writeMatrix(double[][] matrix, File file) {
DecimalFormat df = new DecimalFormat("0.0000E0");
try
{
BufferedWriter buffer = new BufferedWriter(new FileWriter(file));
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix[0].length; j++) {
buffer.write(pad(df.format(matrix[i][j]) + "", 10) + " ");
}
buffer.newLine();
}
buffer.close();
}
catch(IOException ex)
{
ex.printStackTrace();
}
}
/**
* A helper method which writes integer matrices to files.
* For testing purposes only.
* @param matrix
*/
public static void writeMatrix(int[][] matrix, File file) {
try
{
BufferedWriter buffer = new BufferedWriter(new FileWriter(file));
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix[0].length; j++) {
buffer.write(pad(matrix[i][j] + "", 4) + " ");
}
buffer.newLine();
}
buffer.close();
}
catch(IOException ex)
{
ex.printStackTrace();
}
}
/**
* A helper method which pads or truncates a string to a specific length.
* For testing purposes only.
* @param s
* @param length
* @return the padded/truncated string.
*/
public static String pad(String s, int length) {
String ret = s;
for (int i = s.length(); i < length; i++) {
ret += " ";
}
return ret.substring(0, length);
}
public RNAFoldingTools() {
//test();
}
public void test() {
int repeats = 100;
int size = 25;
while (true) {
System.out.println("-------------------------------------------------------------");
size += 25;
String s = "((";
for (int i = 0; i < size; i++) {
s += ".";
}
s += "))";
String[] structures = {s};
double[][] basePairCount = getBasePairCountMatrix(structures);
double[] singleBaseCount = getSingleBaseCount(structures);
//getPosteriorDecodingConsensusStructure(basePairCount, singleBaseCount);
long startTime1 = System.currentTimeMillis();
int divisionSize = 0;
for (int i = 0; i < repeats; i++) {
//getPosteriorDecodingConsensusStructure(basePairCount, singleBaseCount);
MultiThreadedPosteriorDecoding m = new MultiThreadedPosteriorDecoding(basePairCount, singleBaseCount);
m.compute();
divisionSize = m.divisionSize;
}
long endTime1 = System.currentTimeMillis();
long elapsed1 = endTime1 - startTime1;
long startTime2 = System.currentTimeMillis();
for (int i = 0; i < repeats; i++) {
getPosteriorDecodingConsensusStructure(basePairCount, singleBaseCount);
}
long endTime2 = System.currentTimeMillis();
long elapsed2 = endTime2 - startTime2;
double rate1 = (double) repeats / (double) elapsed1;
double rate2 = (double) repeats / (double) elapsed2;
double ratio = rate1 / rate2;
System.out.println((size + 4) + "\t" + elapsed1 + "\t" + elapsed2 + "\t" + divisionSize + "\t" + rate1 + "\t" + rate2 + "\t" + ratio);
if (elapsed2 > 5000) {
repeats = Math.max(1, repeats / 2);
}
}
}
/**
* A class, which given a base-pairing probability matrix and an array
* representing the probabilities of a single nucleotides being unpaired,
* performs a multi-threaded posterior-decoding and returns the MPD consensus structure.
*/
public class MultiThreadedPosteriorDecoding {
double[][] basePairProb;
double[] singleBaseProb;
int[] pairedWith;
final Integer lock = new Integer(0);
final Integer waitLock = new Integer(0);
int maxThreads = Runtime.getRuntime().availableProcessors();
int threadsUsed = 0;
int currentBlockY = 0;
int currentX = 0;
int currentY = 0;
int length;
int startingDivisionSize;
int divisionSize;
int blockIncrement;
double[][] eMatrix;
int[][] S;
public MultiThreadedPosteriorDecoding(double[][] basePairCount, double[] singleBaseCount) {
this.length = singleBaseCount.length;
this.pairedWith = new int[this.length];
this.basePairProb = basePairCount;
this.singleBaseProb = singleBaseCount;
//this.divisionSize = Math.max(length / 75, Math.min(length / maxThreads, maxThreads * maxThreads));
this.divisionSize = Math.max(this.length / this.maxThreads, 10);
this.blockIncrement = this.divisionSize;
// this.divisionSize = Math.max(this.length / this.maxThreads / 5,40);
this.startingDivisionSize = this.divisionSize;
//System.out.println(divisionSize);
this.eMatrix = new double[length][length];
for (int i = 0; i < eMatrix.length; i++) {
for (int j = 0; j < eMatrix.length; j++) {
eMatrix[i][j] = RNAFoldingTools.emptyValue;
}
}
this.S = new int[length][length];
compute();
}
/**
* Initializes the posterior-decoding procedure by launching a specified
* number of threads.
* @see MultiThreadedPosteriorDecoding#computeNextSection()
* @return the value of the dynamic programming matrix at (0, N).
*/
public double compute() {
try {
synchronized (waitLock) {
for (int i = 0; i < this.maxThreads; i++) {
computeNextSection();
}
waitLock.wait();
}
} catch (InterruptedException ex) {
Logger.getLogger(RNAFoldingTools.class.getName()).log(Level.SEVERE, null, ex);
}
// recurse one last time *just* in case missed the 0,N case,
RNAFoldingTools.recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, 0, eMatrix.length - 1, pairedWith);
RNAFoldingTools.traceBack(S, 0, eMatrix.length-1, pairedWith);
return eMatrix[0][eMatrix.length-1];
}
/**
* Launches new threads until no more sections are available.
* The thread class itself recursively calls computeNextSection() each time it completes,
* ensuring that the number of threads running at any one time is less than number of
* available processing cores.
*/
public void computeNextSection() {
synchronized (lock) {
Pair p = getNextSection();
if (p.x != -1) {
new PosteriorDecodingThread(p.x, p.y).start();
}
}
}
/**
* Generates a sequence of coordinates of windows on which posterior-decoding can
* be performed independently in parallel.
* @return the positions of the next window to perform posterior-decoding.
*/
public Pair getNextSection() {
Pair p = getNextSectionRecursive();
if (p.x == -1) {
return p;
} else {
return new Pair(Math.min(p.x, this.length - 1), Math.min(p.y, this.length - 1));
}
}
/**
* @see MultiThreadedPosteriorDecoding#getNextSection()
*/
private Pair getNextSectionRecursive() {
int newX = this.currentX;
int newY = this.currentY;
synchronized (lock) {
if (newX == -1 && newY == -1) {
return new Pair(-1, -1);
} else if (newX == 0 && newY == 0) {
newX = 0;
newY = divisionSize;
} else {
newX += divisionSize;
newY += divisionSize;
if (currentBlockY >= this.length) {
newX = -1;
newY = -1;
} else if (newX >= this.length) {
currentBlockY += this.blockIncrement;
//int j =
//
//divisionSize = (int)Math.max(Math.sqrt(2*Math.pow(length - currentBlockY,2))/ (double)maxThreads, 10);
divisionSize = (int) Math.max((double) (length - currentBlockY) / (double) maxThreads, 10);
//System.out.println(currentBlockY+"\t"+divisionSize);
//System.out.println(length+"\t"+currentBlockY+"\t"+divisionSize);
// start back at the first row of the matrix
//System.out.println(length+"\t"+startingDivisionSize);
//this.divisionSize = (int)Math.max((double) this.divisionSize - ((double)this.divisionSize / ((double)this.length / (double)this.divisionSize)), 1);
//System.out.println(this.divisionSize);
newX = 0;
newY = currentBlockY + divisionSize;
}
}
}
this.currentX = newX;
//this.currentX = this.currentY;
/*
* if(newX != 0) { this.currentX = this.currentY;
}
*/
this.currentY = newY;
if (newY - divisionSize > length) {
return getNextSection();
}
return new Pair(newX, newY);
}
/**
* A single thread which performs posterior-decoding on a specified window.
*/
class PosteriorDecodingThread extends Thread {
int x;
int y;
public PosteriorDecodingThread(int x, int y) {
this.x = x;
this.y = y;
}
public void run() {
RNAFoldingTools.recursePosteriorDecoding(basePairProb, singleBaseProb, eMatrix, S, this.x, this.y);
computeNextSection();
if (this.x == 0 && this.y == MultiThreadedPosteriorDecoding.this.length - 1) {
// complete, so notify
synchronized (waitLock) {
waitLock.notify();
}
}
}
}
}
/**
* Returns a dot bracket string representation given an array paired sites.
* @param pairedSites
* @return a dot bracket string representation given an array paired sites.
*/
public static String getDotBracketStringFromPairedSites(int [] pairedSites)
{
String dbs = "";
for(int i = 0 ; i < pairedSites.length ; i++)
{
if(pairedSites[i] == 0)
{
dbs += ".";
}
else
if(pairedSites[i] > i+1)
{
dbs += "(";
}
else
{
dbs += ")";
}
}
return dbs;
}
/**
* Given a dot bracket string representation, returns an array of paired sites.
* @param dotBracketStructure a dot bracket string representation of a structure.
* @return an array of paired sites.
* @see #getDotBracketStringFromPairedSites(int[])
*/
public static int [] getPairedSitesFromDotBracketString(String dotBracketStructure)
{
int [] pairedSites = new int[dotBracketStructure.length()];
Stack<Integer> stack = new Stack<Integer>();
for (int j = 0; j < dotBracketStructure.length(); j++) {
if (dotBracketStructure.charAt(j) == '(') {
stack.push(j);
} else if (dotBracketStructure.charAt(j) == ')') {
int x = stack.pop();
int y = j;
pairedSites[x] = y + 1;
pairedSites[y] = x + 1;
}
}
return pairedSites;
}
/**
* Given a dot bracket string representation, returns an array of paired sites.
* @param dotBracketStructure a dot bracket string representation of a structure.
* @param openBracket the opening bracket.
* @param closeBracket the closing bracket.
* @return an array of paired sites.
* @see #getDotBracketStringFromPairedSites(int[])
*/
public static int [] getPairedSitesFromDotBracketString(String dotBracketStructure, char openBracket, char closeBracket)
{
int [] pairedSites = new int[dotBracketStructure.length()];
Stack<Integer> stack = new Stack<Integer>();
for (int j = 0; j < dotBracketStructure.length(); j++) {
if (dotBracketStructure.charAt(j) == openBracket) {
stack.push(j);
} else if (dotBracketStructure.charAt(j) == closeBracket) {
if(!stack.empty())
{
int x = stack.pop();
int y = j;
pairedSites[x] = y + 1;
pairedSites[y] = x + 1;
}
}
}
return pairedSites;
}
public static String getReferenceSequence(ArrayList<String> sequences, int refLength)
{
int length = 0;
for(int i = 0 ; i < sequences.size() ; i++)
{
length = Math.max(length, sequences.get(i).length());
}
int [] counts = new int[length];
int maxCount = 0;
boolean [] countUsed = new boolean[length];
for(int i = 0 ; i < sequences.size() ; i++)
{
String seq = sequences.get(i);
for(int j = 0 ; j < seq.length() ; j++)
{
if(seq.charAt(j) != '-')
{
counts[j] += 1;
maxCount = Math.max(maxCount, counts[j]);
}
}
}
String referenceString = "";
int n = 0;
for(int k = maxCount ; k >= 0 ; k--)
{
for(int i = 0 ; i < counts.length ; i++)
{
if(!countUsed[i] && counts[i] == k)
{
countUsed[i] = true;
n++;
}
if(n == refLength)
{
break;
}
}
if(n == refLength)
{
break;
}
}
String ref = "";
for(int i = 0 ; i < countUsed.length ; i++)
{
if(countUsed[i])
{
ref += "N";
}
else
{
ref += "-";
}
}
while(ref.length() < refLength)
{
ref += "X";
}
return ref;
}
public static String getDotBracketStringFromCtFile(File ctFile)
{
return RNAFoldingTools.getDotBracketStringFromPairedSites(RNAFoldingTools.getPairedSitesFromCtFile(ctFile));
}
public static String getSequenceByName(String seqName, List<String> sequences, List<String> sequenceNames)
{
for(int i = 0 ; i < sequences.size() ; i++)
{
if(sequenceNames.get(i).equals(seqName))
{
return sequences.get(i);
}
}
System.err.println("Could not find ref seq: "+seqName);
return sequences.get(0);
}
public static int [] getPairedSitesFromCtFile(File ctFile)
{
try
{
BufferedReader buffer = new BufferedReader(new FileReader(ctFile));
String textline = null;
int [] pairedSites = null;
while ((textline = buffer.readLine()) != null) {
String[] split = textline.trim().split("(\\s)+");
int length = Integer.parseInt(split[0]);
pairedSites = new int[length];
//String sequence = "";
for (int i = 0; i < length && (textline = buffer.readLine()) != null; i++) {
String[] split2 = textline.trim().split("(\\s)+");
pairedSites[Integer.parseInt(split2[0])-1] = Integer.parseInt(split2[4]);
}
}
buffer.close();
return pairedSites;
}
catch(IOException ex)
{
ex.printStackTrace();
}
return null;
}
public static int [] getPairedSitesFromDBNStringFile(File dbnFile)
{
try
{
BufferedReader buffer = new BufferedReader(new FileReader(dbnFile));
String textline = buffer.readLine();
buffer.close();
System.out.println(textline);
return getPairedSitesFromDotBracketString(textline);
}
catch(IOException ex)
{
ex.printStackTrace();
}
return null;
}
public static double [][] loadMatrix(File bpFile)
{
//File bpFile = new File("C:/Users/Michael/Dropbox/RNA and StatAlign/TestRNAData/TestRNAData1.dat.bp");
double [][] bpMatrix = null;
try
{
BufferedReader buffer = new BufferedReader(new FileReader(bpFile));
String textline = buffer.readLine();
int length = textline.split("(\\s)+").length;
bpMatrix = new double[length][length];
for(int i = 0 ; i < length ; i++)
{
String [] split = textline.split("(\\s)+");
for(int j = 0 ; j < length ; j++)
{
bpMatrix[i][j] = Double.parseDouble(split[j]);
}
textline = buffer.readLine();
}
buffer.close();
}
catch(IOException ex)
{
ex.printStackTrace();
}
return bpMatrix;
}
public static double [][] getDoubleMatrix(float [][] matrix)
{
if(matrix == null)
{
return null;
}
double [][] doubleMatrix = new double[matrix.length][matrix[0].length];
for(int i = 0 ; i < matrix.length ; i++)
{
for(int j = 0 ; j < matrix[0].length ; j++)
{
doubleMatrix[i][j] = matrix[i][j];
}
}
return doubleMatrix;
}
public static float [][] getFloatMatrix(double [][] matrix)
{
float [][] floatMatrix = new float[matrix.length][matrix[0].length];
for(int i = 0 ; i < matrix.length ; i++)
{
for(int j = 0 ; j < matrix[0].length ; j++)
{
floatMatrix[i][j] = (float)matrix[i][j];
}
}
return floatMatrix;
}
public static void loadFastaSequences(File file, ArrayList<String> sequences, ArrayList<String> sequenceNames) {
loadFastaSequences(file, sequences, sequenceNames, Integer.MAX_VALUE);
}
public static void loadFastaSequences(File file, ArrayList<String> sequences, ArrayList<String> sequenceNames, int max) {
try {
BufferedReader buffer = new BufferedReader(new FileReader(file));
String textline = null;
String sequence = "";
int n = 0;
boolean maxReached = false;
while ((textline = buffer.readLine()) != null) {
if(maxReached && textline.startsWith(">"))
{
break;
}
if (textline.startsWith(">")) {
n++;
if (n >= max) {
maxReached = true;
}
sequenceNames.add(textline.substring(1));
if (!sequence.equals("")) {
sequences.add(sequence.toUpperCase());
sequence = "";
}
} else {
sequence += textline.trim();
}
}
buffer.close();
if (!sequence.equals("")) {
sequences.add(sequence);
}
} catch (IOException ex) {
ex.printStackTrace();
}
}
public static double calculatePPfoldReliabilityScore(int [] pairedSites, double [][] basePairProb)
{
double [] singleBaseProb = RNAFoldingTools.getSingleBaseProb(basePairProb);
double ppfoldReliablityScore = 0;
for(int i = 0 ; i < pairedSites.length ; i++)
{
if(pairedSites[i] == 0)
{
ppfoldReliablityScore += singleBaseProb[i];
}
else
{
ppfoldReliablityScore += basePairProb[i][pairedSites[i]-1];
}
}
return ppfoldReliablityScore/((double)pairedSites.length);
}
public static double calculatePairsOnlyReliabilityScore(int [] pairedSites, double [][] basePairProb)
{
double ppfoldReliablityScore = 0;
double pairs = 0;
for(int i = 0 ; i < pairedSites.length ; i++)
{
if(pairedSites[i] != 0)
{
ppfoldReliablityScore += basePairProb[i][pairedSites[i]-1];
pairs++;
}
}
if(pairs == 0)
{
return 1;
}
return ppfoldReliablityScore/pairs;
}
public static double calculatePairsOnlyReliabilityScore(int [] pairedSites, double [][] basePairProb, ArrayList<Double> weights)
{
double ppfoldReliablityScore = 0;
double pairs = 0;
double weightSum = 0;
for(int i = 0 ; i < pairedSites.length ; i++)
{
if(pairedSites[i] != 0)
{
double weighting = weights.get(i)*weights.get(pairedSites[i]-1);
ppfoldReliablityScore += basePairProb[i][pairedSites[i]-1]*weighting;
weightSum += weighting;
pairs++;
}
}
if(pairs == 0)
{
return 1;
}
return ppfoldReliablityScore/pairs;
}
public static void writeToFile(File f, String s, boolean append)
{
try
{
BufferedWriter buffer = new BufferedWriter(new FileWriter(f, append));
buffer.write(s+"\n");
buffer.close();
}
catch(IOException ex)
{
ex.printStackTrace();
}
}
public static void saveCtFile(File outFile, int [] pairedSites, String header, String sequence)
{
try
{
BufferedWriter buffer = new BufferedWriter(new FileWriter(outFile));
buffer.write(pairedSites.length +"\t"+header+"\n");
for(int i = 0 ; i < pairedSites.length ; i++)
{
buffer.write((i+1)+"\t"+sequence.charAt(i)+"\t"+i+"\t"+(i+2)+"\t"+pairedSites[i]+"\t"+(i+1)+"\n");
}
buffer.close();
}
catch(IOException ex)
{
ex.printStackTrace();
}
}
public static void saveDotBracketFile(File outFile, int [] pairedSites, String header, String sequence)
{
try
{
BufferedWriter buffer = new BufferedWriter(new FileWriter(outFile));
buffer.write(">"+header+"\n");
buffer.write(sequence+"\n");
buffer.write(RNAFoldingTools.getDotBracketStringFromPairedSites(pairedSites)+"\n");
buffer.close();
}
catch(IOException ex)
{
ex.printStackTrace();
}
}
public static boolean isRNAalignment(ArrayList<String> sequences)
{
double countRNA = 0;
double countNonRNA = 0;
for(int i = 0 ; i < sequences.size() ; i++)
{
String s = sequences.get(i).toUpperCase();
for(int j = 0 ; j < s.length() ; j++)
{
switch(s.charAt(j))
{
case 'A':
countRNA++;
break;
case 'C':
countRNA++;
break;
case 'G':
countRNA++;
break;
case 'T':
countRNA++;
break;
case 'U':
countRNA++;
break;
case '-':
break;
default:
countNonRNA++;
}
}
}
System.out.println(sequences);
double ratio = countRNA / (countRNA + countNonRNA);
System.out.println(ratio);
return ratio > 0.4;
}
}