/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.feature;
import org.broad.igv.feature.basepair.BasePairFeature;
import org.broad.igv.Globals;
import java.awt.Color;
import java.awt.Point;
import java.io.*;
import java.util.*;
// multiple return from loadDotBracket() and loadConnectTable()
class SeqLenAndPairs {
public int seqLen;
public ArrayList<Point> pairs;
public SeqLenAndPairs(int seqLen, ArrayList<Point> pairs) {
this.seqLen = seqLen;
this.pairs = pairs;
}
}
// multiple return from loadPairingProb()
class SeqLenAndBinnedPairs {
public int seqLen;
public ArrayList<ArrayList<Point>> binnedPairs;
public SeqLenAndBinnedPairs(int seqLen, ArrayList<ArrayList<Point>> binnedPairs) {
this.seqLen = seqLen;
this.binnedPairs = binnedPairs;
}
}
/**
* @author sbusan
*/
public class BasePairFileUtils {
// TODO: support bpseq, stockholm, other formats?
// TODO: warning dialog on file overwrite
/**
* Convert RNA-based transcript coordinates to stranded chromosome coords for
* base-pairing arcs.
*
* @param arcs
* @param seqLen
* @param newLeft 1-based genomic coordinate for left-most
* position for input pairs sequence after transformation.
* If strand is "-", this will end up being the 3-prime end
* of the transcript (but the left end in genomic coords).
* @param strand "+" or "-"
* @return
*/
static LinkedList<BasePairFeature> transformArcs(LinkedList<BasePairFeature> arcs,
int seqLen,
int newLeft,
String strand){
LinkedList<BasePairFeature> transArcs = new LinkedList<>();
for (BasePairFeature arc : arcs){
String chr = arc.getChr();
Color color = arc.getColor();
int startLeft, startRight, endLeft, endRight;
if (strand == "+"){
startLeft = arc.getStartLeft() + newLeft - 1;
startRight = arc.getStartRight() + newLeft - 1;
endLeft = arc.getEndLeft() + newLeft - 1;
endRight = arc.getEndRight() + newLeft - 1;
} else if (strand == "-"){
startLeft = seqLen - arc.getEndRight() + newLeft;
startRight = seqLen - arc.getEndLeft() + newLeft;
endLeft = seqLen - arc.getStartRight() + newLeft;
endRight = seqLen - arc.getStartLeft() + newLeft;
} else {
throw new RuntimeException("Unrecognized strand (options: \"+\",\"-\")");
}
BasePairFeature transArc = new BasePairFeature(chr,
startLeft,
startRight,
endLeft,
endRight,
color);
transArcs.add(transArc);
}
return transArcs;
}
static SeqLenAndPairs loadDotBracket(String inFile) throws
FileNotFoundException, IOException {
// TODO: add error messages for misformatted file
ArrayList<Point> pairs = new ArrayList<>();
int seqLen = 0;
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(inFile));
String struct = "";
String nextLine;
while ((nextLine = br.readLine()) != null) {
// skip leading header lines if present
if (nextLine.startsWith(">") || nextLine.startsWith("#")) {
continue;
}
String s = nextLine.trim();
if (s.chars().allMatch(Character::isLetter)) {
// sequence line
continue;
} else {
// assumed structure line
struct += s;
}
}
String leftBrackets = "([{<";
String rightBrackets = ")]}>";
ArrayList<LinkedList<Integer>> openIndices = new ArrayList<>();
for (int i = 0; i < leftBrackets.length(); ++i) {
openIndices.add(new LinkedList<>());
}
for (int i = 0; i < struct.length(); ++i) {
int n = leftBrackets.indexOf(struct.charAt(i));
int k = rightBrackets.indexOf(struct.charAt(i));
if (n >= 0) {
openIndices.get(n).add(i);
} else if (k >= 0) {
int left = i+1;
int right = openIndices.get(k).pollLast()+1;
pairs.add(new Point(left, right));
}
}
seqLen = struct.length();
} finally {
if (br != null) br.close();
}
return new SeqLenAndPairs(seqLen, pairs);
}
static SeqLenAndPairs loadConnectTable(String inFile) throws
FileNotFoundException, IOException {
// TODO: add error messages for misformatted file
ArrayList<Point> pairs = new ArrayList<>();
int seqLen = 0;
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(inFile));
seqLen = Integer.parseInt(Globals.whitespacePattern.split(br.readLine().trim())[0]);
String nextLine;
int n = 1;
while ((nextLine = br.readLine()) != null && n <= seqLen) {
String[] s = Globals.whitespacePattern.split(nextLine.trim());
int left = Integer.parseInt(s[0]);
int right = Integer.parseInt(s[4]);
if (right > left) pairs.add(new Point(left, right));
n++;
}
} finally {
if (br != null) br.close();
}
return new SeqLenAndPairs(seqLen, pairs);
}
static SeqLenAndBinnedPairs loadPairingProb(String inFile) throws
FileNotFoundException, IOException {
// TODO: add probability threshold color legend to track dropdown menu
// TODO: support alternate thresholds, interactive threshold update from track UI?
ArrayList<ArrayList<Point>> binnedPairs = new ArrayList<>();
int seqLen = 0;
double[] probThresh = {0.1, 0.3, 0.8};
double[] negLogTenProbThresh = {0,0,0};
for (int i=0; i<probThresh.length; i++) {
negLogTenProbThresh[i] = -Math.log10(probThresh[i]);
}
for (int i=0; i<probThresh.length; i++) binnedPairs.add(new ArrayList<>());
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(inFile));
seqLen = Integer.parseInt(Globals.whitespacePattern.split(br.readLine().trim())[0]);
br.readLine();
String nextLine;
while ((nextLine = br.readLine()) != null) {
String[] s = Globals.whitespacePattern.split(nextLine.trim());
int left = Integer.parseInt(s[0]);
int right = Integer.parseInt(s[1]);
double negLogTenProb = Double.parseDouble(s[2]);
int binIndex = -1;
for (int i=probThresh.length-1; i>=0; i--){
if (negLogTenProb <= negLogTenProbThresh[i]) {
binIndex = i;
break;
}
}
if (binIndex != -1) binnedPairs.get(binIndex).add(new Point(left, right));
}
} finally {
if (br != null) br.close();
}
return new SeqLenAndBinnedPairs(seqLen, binnedPairs);
}
static void writeBasePairFile(String bpFile,
ArrayList<Color> colors,
ArrayList<LinkedList<BasePairFeature>> groupedArcs) throws IOException {
PrintWriter pw = null;
try {
pw = new PrintWriter(new BufferedWriter(new FileWriter(bpFile)));
// first write enumerated colors header
for (Color color : colors) {
pw.println("color:\t" + color.getRed() + "\t" + color.getGreen() + "\t" + color.getBlue());
}
// then write arc coordinates
int colorIndex = 0;
for (LinkedList<BasePairFeature> colorGroup : groupedArcs) {
for (BasePairFeature arc : colorGroup) {
pw.println(arc.toStringNoColor() + "\t" + colorIndex);
}
colorIndex++;
}
} finally {
if (pw != null) pw.close();
}
}
/**
* Merge adjacent basepairs into helices. This makes assumptions about input pair list order.
*/
static LinkedList<BasePairFeature> pairsToHelices(ArrayList<Point> pairs,
String chromosome) {
ArrayList<Point> bps = new ArrayList<>(pairs);
LinkedList<LinkedList<Point>> helixPairGroups = new LinkedList<>();
// FIXME: there should be a much faster and more elegant way to do this
while (bps.size() > 0) {
Point bp = bps.get(0);
LinkedList<Point> helixPairs = new LinkedList<>();
boolean[] removePairs = new boolean[bps.size()];
helixPairs.add(bp);
removePairs[0] = true;
boolean endOfList = false;
/* - ignore other pairs starting with this left nuc
* - if any base pairs exist starting one nuc downstream of current left nuc
* and ending one nuc upstream of current right nuc, store index to remove
* and append to growing helix
*/
int i = 1;
int skippedCount = 0;
if (i < bps.size()) {
while (bps.get(i).x == bp.x) {
i++;
skippedCount++;
if (i >= bps.size()) {
endOfList = true;
break;
}
}
} else {
endOfList = true;
}
while (i < bps.size()) {
if (bps.get(i).x - bp.x > 1) {
// reached the end of the helix
helixPairGroups.add(helixPairs);
// remove helix pairs
ArrayList<Point> tmpBps = new ArrayList<>();
for (int k = 0; k < bps.size(); k++) {
if (!removePairs[k]) tmpBps.add(bps.get(k));
}
bps = tmpBps;
break;
} else if (bps.get(i).y - bp.y == -1) {
bp = bps.get(i);
helixPairs.add(bp);
removePairs[i] = true;
}
i++;
if (i >= bps.size()) {
endOfList = true;
break;
}
}
if (endOfList) {
helixPairGroups.add(helixPairs);
// remove helix pairs
ArrayList<Point> tmpBps = new ArrayList<>();
for (int k = 0; k < bps.size(); k++) {
if (!removePairs[k]) tmpBps.add(bps.get(k));
}
bps = tmpBps;
}
}
// convert lists of adjacent pairs to BasePairFeatures
LinkedList<BasePairFeature> helices = new LinkedList<>();
for (LinkedList<Point> helixPairs : helixPairGroups) {
int startLeft = Integer.MAX_VALUE;
int startRight = 0;
int endLeft = Integer.MAX_VALUE;
int endRight = 0;
for (Point pair : helixPairs) {
if (pair.x < startLeft) startLeft = pair.x;
if (pair.x > startRight) startRight = pair.x;
if (pair.y < endLeft) endLeft = pair.y;
if (pair.y > endRight) endRight = pair.y;
}
helices.add(new BasePairFeature(chromosome,
startLeft,
startRight,
endLeft,
endRight,
null));
}
return helices;
}
/**
* Convert a base pairing structure file in dot-bracket notation
* (also known as Vienna format) to an easily parseable .bp arcs file. Does not
* currently handle mapping coords to spliced transcripts.
*
*/
public static void dotBracketToBasePairFile(String inFile,
String bpFile,
String chromosome,
String strand,
int left) throws
FileNotFoundException, IOException {
SeqLenAndPairs s = loadDotBracket(inFile);
ArrayList<Point> pairs = s.pairs;
int seqLen = s.seqLen;
LinkedList<BasePairFeature> arcs = pairsToHelices(pairs, chromosome);
arcs = transformArcs(arcs, seqLen, left, strand);
ArrayList<Color> colors = new ArrayList<>();
colors.add(Color.black);
ArrayList<LinkedList<BasePairFeature>> groupedArcs = new ArrayList<>();
groupedArcs.add(arcs); // list of length 1 for this case (arcs only have 1 color)
writeBasePairFile(bpFile, colors, groupedArcs);
}
/**
* Convert a connectivity table file as output by RNAStructure
* to an easily parseable .bp arcs file. Does not
* currently handle mapping coords to spliced transcripts.
*/
public static void connectTableToBasePairFile(String inFile,
String bpFile,
String chromosome,
String strand,
int left) throws
FileNotFoundException, IOException {
SeqLenAndPairs s = loadConnectTable(inFile);
ArrayList<Point> pairs = s.pairs;
int seqLen = s.seqLen;
LinkedList<BasePairFeature> arcs = pairsToHelices(pairs, chromosome);
arcs = transformArcs(arcs, seqLen, left, strand);
ArrayList<Color> colors = new ArrayList<>();
colors.add(Color.black);
ArrayList<LinkedList<BasePairFeature>> groupedArcs = new ArrayList<>();
groupedArcs.add(arcs); // list of length 1 for this case (arcs only have 1 color)
writeBasePairFile(bpFile, colors, groupedArcs);
}
/**
* Convert a pairing probability file as output by RNAStructure
* and/or SuperFold to an easily parseable .bp arcs file. Does not
* currently handle mapping coords to spliced transcripts.
*/
public static void pairingProbToBasePairFile(String inFile,
String bpFile,
String chromosome,
String strand,
int left) throws
FileNotFoundException, IOException {
SeqLenAndBinnedPairs s = loadPairingProb(inFile);
ArrayList<ArrayList<Point>> binnedPairs = s.binnedPairs;
int seqLen = s.seqLen;
ArrayList<LinkedList<BasePairFeature>> groupedArcs = new ArrayList<>();
for (ArrayList<Point> pairGroup : binnedPairs){
groupedArcs.add(transformArcs(pairsToHelices(pairGroup, chromosome),
seqLen, left, strand));
}
ArrayList<Color> colors = new ArrayList<>();
colors.add(new Color(255, 204, 0));
colors.add(new Color(72, 143, 205));
colors.add(new Color(81, 184, 72));
writeBasePairFile(bpFile, colors, groupedArcs);
}
}