import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import java.util.Stack;
public class h2 {
public static final int GAP_PENALTY = -4;
public static final int NUM_PERMUTED_MATCHES = 10000;
public final static Random rand = new Random();
public static void main(String[] args) throws Exception {
BLOSUM62File.parseBLOSUM62File();
//BLOSUM62File.printBLOSUM62File();
final String s = "deadly";
final String t = "ddgearlyk";
// final String s = "MELLSLCSWFAAATTYDADFYDDP";
// final String t = "MSNWTATSSDSTS";
// put custom strings into protein format so that I can use the protein pair class
final String ps = String.format(">sp|%s|Blah blah\n%s", s, s);
final String pt = String.format(">sp|%s|Blah blah\n%s", t, t);
ProteinPair simple = new ProteinPair(new FastA(new StringBuilder(ps)),
new FastA(new StringBuilder(pt)), true);
simple.calculatePValue();
List<FastA> proteins = readProteins("../data/proteins.fasta");
List<ProteinPair> proteinPairs = new ArrayList<ProteinPair>();
// print simple string results
simple.matchingResult.printOut(s, t);
System.out.println(simple.matchScore + " " + simple.pValue);
simple.matchingResult.printMatrix(s, t);
for (int i = 0; i < proteins.size(); i++) {
for (int j = i + 1; j < proteins.size(); j++) {
ProteinPair p = new ProteinPair(proteins.get(i),
proteins.get(j), true);
if (p.p1.AccessionCode.equals("P15172") && p.p2.AccessionCode.endsWith("Q10574")) {
p.calculatePValue();
}
if (p.p1.AccessionCode.equals("P15172") && p.p2.AccessionCode.endsWith("O95363")) {
p.calculatePValue();
}
proteinPairs.add(p);
}
}
// print out results for all protein pairs generated
for (ProteinPair p : proteinPairs) {
System.out.println(String.format("%s, %s, %d", p.p1.AccessionCode,
p.p2.AccessionCode, p.matchScore));
p.matchingResult.printOut(p.p1.AccessionCode, p.p2.AccessionCode);
}
s.length();
}
public static Result smithWaterman(StringBuilder s, StringBuilder t) throws Exception {
int[][] M = new int[s.length() + 1][t.length() + 1];
int maxResult = 0;
int max_i = 0;
int max_j = 0;
for (int i = 1; i < M.length; i++) {
for (int j = 1; j < M[i].length; j++) {
int[] optionsToChoose = new int[4];
optionsToChoose[0] = 0;
optionsToChoose[1] = M[i - 1][j - 1]
+ substitionMatrix(s.charAt(i-1), t.charAt(j-1));
optionsToChoose[2] = M[i - 1][j] + GAP_PENALTY;
optionsToChoose[3] = M[i][j - 1] + GAP_PENALTY;
int optionChosen = 0;
for (int k = 1; k < optionsToChoose.length; k++) {
if (optionsToChoose[k] > optionsToChoose[optionChosen]) {
optionChosen = k;
}
}
M[i][j] = optionsToChoose[optionChosen];
if (M[i][j] > maxResult) {
maxResult = M[i][j];
max_i = i;
max_j = j;
}
}
}
return new Result(M, M.length, M[0].length, max_i, max_j, maxResult);
}
public static void traceBack(Result r, String s, String t, int i, int j)
throws Exception {
int[][] M = r.Matrix;
if (M[i][j] == 0) {
return;
}
int[] optionsToChoose = new int[4];
int substMatrixResult = substitionMatrix(s.charAt(i-1), t.charAt(j-1));
optionsToChoose[0] = 0;
optionsToChoose[1] = M[i - 1][j - 1]
+ substMatrixResult;
optionsToChoose[2] = M[i - 1][j] + GAP_PENALTY;
optionsToChoose[3] = M[i][j - 1] + GAP_PENALTY;
int optionChosen = 0;
for (int k = 1; k < optionsToChoose.length; k++) {
if (optionsToChoose[k] > optionsToChoose[optionChosen]) {
optionChosen = k;
}
}
M[i][j] = optionsToChoose[optionChosen];
switch (optionChosen) {
case 0:
return;
case 1:
// using a stack to store characters that are part of the match and delta
r.pushX(s.charAt(i-1));
r.x_i.push(i-1);
r.pushY(t.charAt(j-1));
r.y_j.push(j-1);
if (s.charAt(i-1) == t.charAt(j-1)) {
r.pushDiff(s.charAt(i-1));
} else {
if(substMatrixResult > 0) {
r.pushDiff('+');
} else {
r.pushDiff(' ');
}
}
traceBack(r, s, t, i-1, j-1);
break;
case 2:
r.pushX(s.charAt(i-1));
r.x_i.push(i-1);
r.pushY('-');
r.y_j.push(j-1);
r.pushDiff(' ');
traceBack(r, s, t, i-1, j);
break;
case 3:
r.pushX('-');
r.x_i.push(i-1);
r.pushY(t.charAt(j-1));
r.y_j.push(j-1);
r.pushDiff(' ');
traceBack(r, s, t, i, j-1);
break;
default:
throw new Exception("Unkown Option chosen: " + optionChosen);
}
return;
}
private static class ProteinPair {
FastA p1;
FastA p2;
int matchScore;
double pValue = -1;
int[][] matchMatrix;
Result matchingResult;
ProteinPair(FastA pair1, FastA pair2, boolean doTraceBack) throws Exception {
this.p1 = pair1;
this.p2 = pair2;
calculateMatch(doTraceBack);
}
@Override
public String toString() {
return String
.format("[%s, %s]", p1.AccessionCode, p2.AccessionCode);
}
private void calculateMatch(boolean doTraceBack) throws Exception {
Result r = smithWaterman(p1.Proteinsb, p2.Proteinsb);
if (doTraceBack) {
traceBack(r, p1.ProteinString, p2.ProteinString, r.max_i, r.max_j);
}
this.matchMatrix = r.Matrix;
this.matchScore = r.maxScore;
this.matchingResult = r;
}
public void calculatePValue() throws Exception {
double numBetterMatchesFound = 0;
for (int i = 0; i < NUM_PERMUTED_MATCHES; i++) {
p2.Proteinsb = generatePermutedString(p2.Proteinsb);
Result r = smithWaterman(p1.Proteinsb, p2.Proteinsb);
if (r.maxScore > this.matchScore) {
numBetterMatchesFound++;
}
}
pValue = (numBetterMatchesFound + 1.0) / (NUM_PERMUTED_MATCHES + 1.0);
}
public StringBuilder generatePermutedString(StringBuilder proteinsb) {
char tmp;
int swapIndex = -1;
for (int i = proteinsb.length() - 1; i > 0; i--) {
swapIndex = rand.nextInt(i+1);
tmp = proteinsb.charAt(i);
proteinsb.setCharAt(i, proteinsb.charAt(swapIndex));
proteinsb.setCharAt(swapIndex, tmp);
}
return proteinsb;
}
}
private static class FastA {
public final String Name;
public final String AccessionCode;
public final String ProteinString;
public StringBuilder Proteinsb;
public FastA(StringBuilder data) {
String[] lines = data.toString().split("\n");
Name = lines[0].split("\\|")[2];
AccessionCode = lines[0].split("\\|")[1];
ProteinString = lines[1];
Proteinsb = new StringBuilder(ProteinString);
}
@Override
public String toString() {
return "FastA [AccessionCode=" + AccessionCode + ", ProteinString="
+ ProteinString + "]";
}
}
private static class Result {
public int[][] Matrix;
public int size_i;
public int size_j;
public int max_i;
public int max_j;
public int maxScore;
Stack<Character> x;
Stack<Character> y;
Stack<Character> diff;
Stack<Integer> x_i;
Stack<Integer> y_j;
public Result(int[][] matrix, int size_i, int size_j, int max_i,
int max_j, int maxScore) {
super();
Matrix = matrix;
this.size_i = size_i;
this.size_j = size_j;
this.max_i = max_i;
this.max_j = max_j;
this.maxScore = maxScore;
x = new Stack<Character>();
y = new Stack<Character>();
diff = new Stack<Character>();
x_i = new Stack<Integer>();
y_j = new Stack<Integer>();
}
public void pushX (char c) {
x.push(c);
}
public void pushY (char c) {
y.push(c);
}
public void pushDiff (char c) {
diff.push(c);
}
private String pop60(Stack<Character> s, Stack<Integer> n_i) {
StringBuilder sb = new StringBuilder();
boolean firstNum = true;
int charNum = 0;
if (s.empty()) {
return null;
}
for (int i = 0;i < 60; i++) {
if (s.empty()) {
break;
}
if (n_i != null) {
if (firstNum) {
charNum = n_i.pop();
firstNum = false;
} else {
n_i.pop();
}
}
sb.append(s.pop());
}
String num;
if (n_i != null) {
num = Integer.toString(charNum + 1);
} else {
num = "";
}
return String.format("%1$4s ", num) + sb.toString();
}
public void printOut(String x_Name, String y_Name) {
while (true) {
String s_x = pop60(x, x_i);
String s_y = pop60(y, y_j);
String s_diff = pop60(diff, null);
if (s_x == null)
break;
x_Name = String.format("%1$10s", x_Name);
y_Name = String.format("%1$10s", y_Name);
System.out.println(x_Name + ":" + s_x);
System.out.println(String.format(" %s",s_diff));
System.out.println(y_Name + ":" + s_y);
System.out.println();
}
}
public void printMatrix (String x_Name, String y_Name) {
for (int i = 1; i < Matrix.length; i++) {
if (i==1) {
System.out.print(" ");
for (int k = 0; k < y_Name.length(); k++) {
System.out.print(" " + y_Name.charAt(k));
}
System.out.println();
}
System.out.print(" " + x_Name.charAt(i-1));
for (int j = 1; j < Matrix[i].length; j++) {
System.out.print(String.format("%1$4s", Integer.toString(Matrix[i][j])));
}
System.out.println();
}
}
}
public static List<FastA> readProteins(String fileName) throws IOException {
BufferedReader in = null;
List<FastA> proteins = null;
try {
in = new BufferedReader(new FileReader(fileName));
String line = null;
StringBuilder sb = new StringBuilder();
line = in.readLine();
proteins = new ArrayList<FastA>();
while (true) {
if (line == null)
break;
if (line.trim().charAt(0) == '>') {
sb.append(line);
sb.append("\n");
line = in.readLine();
while (line != null && line.trim().charAt(0) != '>') {
sb.append(line);
line = in.readLine();
}
proteins.add(new FastA(sb));
sb = new StringBuilder();
}
}
} catch (FileNotFoundException e) {
System.out.println("Unable to find " + fileName + e.getMessage());
} catch (IOException e1) {
System.out.println("Exception trying to close file BLOSUM62.txt");
} finally {
in.close();
}
return proteins;
}
public static int substitionMatrix(char x_i, char y_j) {
return BLOSUM62File.matchQuality(x_i, y_j);
}
private static class BLOSUM62File {
// index at which the data for a particular character is stored
private static HashMap<Character, Integer> foo;
private static int[][] blosumValues;
public static void parseBLOSUM62File() {
foo = new HashMap<Character, Integer>();
boolean foundHeaders = false;
int rowCounter = 0;
BufferedReader in = null;
try {
in = new BufferedReader(new FileReader("../data/BLOSUM62.txt"));
String line = null;
while (true) {
line = in.readLine();
if (line == null)
break;
line = line.toUpperCase();
if (line.trim().charAt(0) == '#')
continue;
if (!foundHeaders && line.contains("A")) {
String[] markers = line.trim().split(" +");
blosumValues = new int[markers.length][markers.length];
for (int i = 0; i < markers.length; i++) {
foo.put(markers[i].charAt(0), i);
}
foundHeaders = true;
continue;
}
if (foundHeaders) {
String[] values = line.trim().split(" +");
for (int i = 1; i < values.length; i++) {
blosumValues[rowCounter][i - 1] = Integer
.parseInt(values[i]);
}
rowCounter++;
}
}
} catch (FileNotFoundException e) {
System.out.println("Unable to find ../data/BLOSUM62.txt: "
+ e.getMessage());
} catch (IOException e1) {
System.out
.println("Exception trying to close file BLOSUM62.txt");
}
}
public static void printBLOSUM62File() {
for (int i = 0; i < blosumValues.length; i++) {
for (int j = 0; j < blosumValues[i].length; j++) {
System.out.print(" " + blosumValues[i][j]);
}
System.out.println();
}
}
public static int matchQuality(char x_i, char y_i) {
return blosumValues[foo.get(Character.toUpperCase(x_i))][foo.get(Character.toUpperCase(y_i))];
}
}
}