import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Random; import java.util.Stack; public class h2 { public static final int GAP_PENALTY = -4; public static final int NUM_PERMUTED_MATCHES = 10000; public final static Random rand = new Random(); public static void main(String[] args) throws Exception { BLOSUM62File.parseBLOSUM62File(); //BLOSUM62File.printBLOSUM62File(); final String s = "deadly"; final String t = "ddgearlyk"; // final String s = "MELLSLCSWFAAATTYDADFYDDP"; // final String t = "MSNWTATSSDSTS"; // put custom strings into protein format so that I can use the protein pair class final String ps = String.format(">sp|%s|Blah blah\n%s", s, s); final String pt = String.format(">sp|%s|Blah blah\n%s", t, t); ProteinPair simple = new ProteinPair(new FastA(new StringBuilder(ps)), new FastA(new StringBuilder(pt)), true); simple.calculatePValue(); List<FastA> proteins = readProteins("../data/proteins.fasta"); List<ProteinPair> proteinPairs = new ArrayList<ProteinPair>(); // print simple string results simple.matchingResult.printOut(s, t); System.out.println(simple.matchScore + " " + simple.pValue); simple.matchingResult.printMatrix(s, t); for (int i = 0; i < proteins.size(); i++) { for (int j = i + 1; j < proteins.size(); j++) { ProteinPair p = new ProteinPair(proteins.get(i), proteins.get(j), true); if (p.p1.AccessionCode.equals("P15172") && p.p2.AccessionCode.endsWith("Q10574")) { p.calculatePValue(); } if (p.p1.AccessionCode.equals("P15172") && p.p2.AccessionCode.endsWith("O95363")) { p.calculatePValue(); } proteinPairs.add(p); } } // print out results for all protein pairs generated for (ProteinPair p : proteinPairs) { System.out.println(String.format("%s, %s, %d", p.p1.AccessionCode, p.p2.AccessionCode, p.matchScore)); p.matchingResult.printOut(p.p1.AccessionCode, p.p2.AccessionCode); } s.length(); } public static Result smithWaterman(StringBuilder s, StringBuilder t) throws Exception { int[][] M = new int[s.length() + 1][t.length() + 1]; int maxResult = 0; int max_i = 0; int max_j = 0; for (int i = 1; i < M.length; i++) { for (int j = 1; j < M[i].length; j++) { int[] optionsToChoose = new int[4]; optionsToChoose[0] = 0; optionsToChoose[1] = M[i - 1][j - 1] + substitionMatrix(s.charAt(i-1), t.charAt(j-1)); optionsToChoose[2] = M[i - 1][j] + GAP_PENALTY; optionsToChoose[3] = M[i][j - 1] + GAP_PENALTY; int optionChosen = 0; for (int k = 1; k < optionsToChoose.length; k++) { if (optionsToChoose[k] > optionsToChoose[optionChosen]) { optionChosen = k; } } M[i][j] = optionsToChoose[optionChosen]; if (M[i][j] > maxResult) { maxResult = M[i][j]; max_i = i; max_j = j; } } } return new Result(M, M.length, M[0].length, max_i, max_j, maxResult); } public static void traceBack(Result r, String s, String t, int i, int j) throws Exception { int[][] M = r.Matrix; if (M[i][j] == 0) { return; } int[] optionsToChoose = new int[4]; int substMatrixResult = substitionMatrix(s.charAt(i-1), t.charAt(j-1)); optionsToChoose[0] = 0; optionsToChoose[1] = M[i - 1][j - 1] + substMatrixResult; optionsToChoose[2] = M[i - 1][j] + GAP_PENALTY; optionsToChoose[3] = M[i][j - 1] + GAP_PENALTY; int optionChosen = 0; for (int k = 1; k < optionsToChoose.length; k++) { if (optionsToChoose[k] > optionsToChoose[optionChosen]) { optionChosen = k; } } M[i][j] = optionsToChoose[optionChosen]; switch (optionChosen) { case 0: return; case 1: // using a stack to store characters that are part of the match and delta r.pushX(s.charAt(i-1)); r.x_i.push(i-1); r.pushY(t.charAt(j-1)); r.y_j.push(j-1); if (s.charAt(i-1) == t.charAt(j-1)) { r.pushDiff(s.charAt(i-1)); } else { if(substMatrixResult > 0) { r.pushDiff('+'); } else { r.pushDiff(' '); } } traceBack(r, s, t, i-1, j-1); break; case 2: r.pushX(s.charAt(i-1)); r.x_i.push(i-1); r.pushY('-'); r.y_j.push(j-1); r.pushDiff(' '); traceBack(r, s, t, i-1, j); break; case 3: r.pushX('-'); r.x_i.push(i-1); r.pushY(t.charAt(j-1)); r.y_j.push(j-1); r.pushDiff(' '); traceBack(r, s, t, i, j-1); break; default: throw new Exception("Unkown Option chosen: " + optionChosen); } return; } private static class ProteinPair { FastA p1; FastA p2; int matchScore; double pValue = -1; int[][] matchMatrix; Result matchingResult; ProteinPair(FastA pair1, FastA pair2, boolean doTraceBack) throws Exception { this.p1 = pair1; this.p2 = pair2; calculateMatch(doTraceBack); } @Override public String toString() { return String .format("[%s, %s]", p1.AccessionCode, p2.AccessionCode); } private void calculateMatch(boolean doTraceBack) throws Exception { Result r = smithWaterman(p1.Proteinsb, p2.Proteinsb); if (doTraceBack) { traceBack(r, p1.ProteinString, p2.ProteinString, r.max_i, r.max_j); } this.matchMatrix = r.Matrix; this.matchScore = r.maxScore; this.matchingResult = r; } public void calculatePValue() throws Exception { double numBetterMatchesFound = 0; for (int i = 0; i < NUM_PERMUTED_MATCHES; i++) { p2.Proteinsb = generatePermutedString(p2.Proteinsb); Result r = smithWaterman(p1.Proteinsb, p2.Proteinsb); if (r.maxScore > this.matchScore) { numBetterMatchesFound++; } } pValue = (numBetterMatchesFound + 1.0) / (NUM_PERMUTED_MATCHES + 1.0); } public StringBuilder generatePermutedString(StringBuilder proteinsb) { char tmp; int swapIndex = -1; for (int i = proteinsb.length() - 1; i > 0; i--) { swapIndex = rand.nextInt(i+1); tmp = proteinsb.charAt(i); proteinsb.setCharAt(i, proteinsb.charAt(swapIndex)); proteinsb.setCharAt(swapIndex, tmp); } return proteinsb; } } private static class FastA { public final String Name; public final String AccessionCode; public final String ProteinString; public StringBuilder Proteinsb; public FastA(StringBuilder data) { String[] lines = data.toString().split("\n"); Name = lines[0].split("\\|")[2]; AccessionCode = lines[0].split("\\|")[1]; ProteinString = lines[1]; Proteinsb = new StringBuilder(ProteinString); } @Override public String toString() { return "FastA [AccessionCode=" + AccessionCode + ", ProteinString=" + ProteinString + "]"; } } private static class Result { public int[][] Matrix; public int size_i; public int size_j; public int max_i; public int max_j; public int maxScore; Stack<Character> x; Stack<Character> y; Stack<Character> diff; Stack<Integer> x_i; Stack<Integer> y_j; public Result(int[][] matrix, int size_i, int size_j, int max_i, int max_j, int maxScore) { super(); Matrix = matrix; this.size_i = size_i; this.size_j = size_j; this.max_i = max_i; this.max_j = max_j; this.maxScore = maxScore; x = new Stack<Character>(); y = new Stack<Character>(); diff = new Stack<Character>(); x_i = new Stack<Integer>(); y_j = new Stack<Integer>(); } public void pushX (char c) { x.push(c); } public void pushY (char c) { y.push(c); } public void pushDiff (char c) { diff.push(c); } private String pop60(Stack<Character> s, Stack<Integer> n_i) { StringBuilder sb = new StringBuilder(); boolean firstNum = true; int charNum = 0; if (s.empty()) { return null; } for (int i = 0;i < 60; i++) { if (s.empty()) { break; } if (n_i != null) { if (firstNum) { charNum = n_i.pop(); firstNum = false; } else { n_i.pop(); } } sb.append(s.pop()); } String num; if (n_i != null) { num = Integer.toString(charNum + 1); } else { num = ""; } return String.format("%1$4s ", num) + sb.toString(); } public void printOut(String x_Name, String y_Name) { while (true) { String s_x = pop60(x, x_i); String s_y = pop60(y, y_j); String s_diff = pop60(diff, null); if (s_x == null) break; x_Name = String.format("%1$10s", x_Name); y_Name = String.format("%1$10s", y_Name); System.out.println(x_Name + ":" + s_x); System.out.println(String.format(" %s",s_diff)); System.out.println(y_Name + ":" + s_y); System.out.println(); } } public void printMatrix (String x_Name, String y_Name) { for (int i = 1; i < Matrix.length; i++) { if (i==1) { System.out.print(" "); for (int k = 0; k < y_Name.length(); k++) { System.out.print(" " + y_Name.charAt(k)); } System.out.println(); } System.out.print(" " + x_Name.charAt(i-1)); for (int j = 1; j < Matrix[i].length; j++) { System.out.print(String.format("%1$4s", Integer.toString(Matrix[i][j]))); } System.out.println(); } } } public static List<FastA> readProteins(String fileName) throws IOException { BufferedReader in = null; List<FastA> proteins = null; try { in = new BufferedReader(new FileReader(fileName)); String line = null; StringBuilder sb = new StringBuilder(); line = in.readLine(); proteins = new ArrayList<FastA>(); while (true) { if (line == null) break; if (line.trim().charAt(0) == '>') { sb.append(line); sb.append("\n"); line = in.readLine(); while (line != null && line.trim().charAt(0) != '>') { sb.append(line); line = in.readLine(); } proteins.add(new FastA(sb)); sb = new StringBuilder(); } } } catch (FileNotFoundException e) { System.out.println("Unable to find " + fileName + e.getMessage()); } catch (IOException e1) { System.out.println("Exception trying to close file BLOSUM62.txt"); } finally { in.close(); } return proteins; } public static int substitionMatrix(char x_i, char y_j) { return BLOSUM62File.matchQuality(x_i, y_j); } private static class BLOSUM62File { // index at which the data for a particular character is stored private static HashMap<Character, Integer> foo; private static int[][] blosumValues; public static void parseBLOSUM62File() { foo = new HashMap<Character, Integer>(); boolean foundHeaders = false; int rowCounter = 0; BufferedReader in = null; try { in = new BufferedReader(new FileReader("../data/BLOSUM62.txt")); String line = null; while (true) { line = in.readLine(); if (line == null) break; line = line.toUpperCase(); if (line.trim().charAt(0) == '#') continue; if (!foundHeaders && line.contains("A")) { String[] markers = line.trim().split(" +"); blosumValues = new int[markers.length][markers.length]; for (int i = 0; i < markers.length; i++) { foo.put(markers[i].charAt(0), i); } foundHeaders = true; continue; } if (foundHeaders) { String[] values = line.trim().split(" +"); for (int i = 1; i < values.length; i++) { blosumValues[rowCounter][i - 1] = Integer .parseInt(values[i]); } rowCounter++; } } } catch (FileNotFoundException e) { System.out.println("Unable to find ../data/BLOSUM62.txt: " + e.getMessage()); } catch (IOException e1) { System.out .println("Exception trying to close file BLOSUM62.txt"); } } public static void printBLOSUM62File() { for (int i = 0; i < blosumValues.length; i++) { for (int j = 0; j < blosumValues[i].length; j++) { System.out.print(" " + blosumValues[i][j]); } System.out.println(); } } public static int matchQuality(char x_i, char y_i) { return blosumValues[foo.get(Character.toUpperCase(x_i))][foo.get(Character.toUpperCase(y_i))]; } } }