/*
* Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br)
* Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com)
*
* For further information check the LICENSE file.
*/
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package bio.pih.genoogle.alignment;
/*
* Created on 05.09.2005
*
*/
/**
* Smith and Waterman developed an efficient dynamic programing algorithm to perform local sequence
* alignments, which returns the most conserved region of two sequences (longes common substring
* with modifications). This algorithm is performed by the method <code>pairwiseAlignment</code> of
* this class. It uses affine gap penalties if and only if the expenses of a delete or insert
* operation are unequal to the expenses of gap extension. This uses significantly more memory (four
* times as much) and increases the runtime if swaping is performed.
*
* @author Felipe Albrecht
* @author Andreas Dräger
* @author Gero Greiner
* @since 1.5
*/
public class StringGenoogleSmithWaterman extends GenoogleSmithWaterman {
private static final long serialVersionUID = 2884980510887845616L;
/**
* Constructs the new SmithWaterman alignment object. Alignments are only performed, if the
* alphabet of the given <code>SubstitutionMatrix</code> equals the alpabet of both the query
* and the target <code>Sequence</code>. The alignment parameters here are expenses and not
* scores as they are in the <code>NeedlemanWunsch</code> object. scores are just given by
* multipliing the expenses with <code>(-1)</code>. For example you could use parameters like
* "-2, 5, 3, 3, 0". If the expenses for gap extension are equal to the cost of starting a gap
* (delete or insert), no affine gap penalties are used, which saves memory.
*
* @param match
* expenses for a match
* @param replace
* expenses for a replace operation
* @param insert
* expenses for a gap opening in the query sequence
* @param delete
* expenses for a gap opening in the target sequence
* @param gapExtend
* expenses for the extension of a gap which was started earlier.y
*/
public StringGenoogleSmithWaterman(int match, int replace, int insert, int delete, int gapExtend) {
super(match, replace, insert, delete, gapExtend);
}
int maxI = 0, maxJ = 0, queryStart = 0, targetStart = 0;
/**
* @param query
* @param subject
* @return the score of the alignment
*/
public int pairwiseAlignment(String query, String subject) {
int[][] scoreMatrix = new int[query.length() + 1][subject.length() + 1];
int builderLength = (int) (Math.max(query.length(), subject.length()) * 1.20);
StringBuilder pathBuilder = new StringBuilder(builderLength);
StringBuilder[] alignBuilder = new StringBuilder[2];
alignBuilder[0] = new StringBuilder(builderLength);
alignBuilder[1] = new StringBuilder(builderLength);
/*
* Use affine gap panalties.
*/
if ((gapExt != delete) || (gapExt != insert)) {
affinedGapAlignment(query, subject, scoreMatrix, pathBuilder, alignBuilder);
/*
* No affine gap penalties to save memory.
*/
} else {
nonAfinedGapAlignment(query, subject, scoreMatrix, pathBuilder, alignBuilder);
}
this.score = scoreMatrix[maxI][maxJ];
this.path = pathBuilder.reverse().toString();
this.align[0] = alignBuilder[0].reverse().toString();
this.align[1] = alignBuilder[1].reverse().toString();
return this.score;
}
private void nonAfinedGapAlignment(String query, String subject, int[][] scoreMatrix,
StringBuilder pathBuilder, StringBuilder[] alignBuilder) {
int i;
int j;
int queryLength = query.length();
int subjectLength = subject.length();
int k = 3;
for (i = 0; i <= queryLength; i++)
scoreMatrix[i][0] = 0;
for (j = 0; j <= subjectLength; j++)
scoreMatrix[0][j] = 0;
for (i = 1; i <= queryLength; i++) {
int from = Math.max(1, i - k);
int to = Math.min(subjectLength, i + k);
for (j = from; j <= to; j++) {
scoreMatrix[i][j] = max(0, scoreMatrix[i - 1][j] + delete, scoreMatrix[i][j - 1]
+ insert, scoreMatrix[i - 1][j - 1] + (query.charAt(i-1)==subject.charAt(j-1)?match:replace));
if (scoreMatrix[i][j] > scoreMatrix[maxI][maxJ]) {
maxI = i;
maxJ = j;
}
}
}
/*
* Here starts the traceback for non-affine gap penalities
*/
backtrace(query, subject, scoreMatrix, pathBuilder, alignBuilder);
}
private void affinedGapAlignment(String query, String subject, int[][] scoreMatrix,
StringBuilder pathBuilder, StringBuilder[] alignBuilder) {
int i;
int j;
int[][] E = new int[query.length() + 1][subject.length() + 1]; // Inserts
int[][] F = new int[query.length() + 1][subject.length() + 1]; // Deletes
scoreMatrix[0][0] = 0;
E[0][0] = F[0][0] = Integer.MIN_VALUE;
for (i = 1; i <= query.length(); i++) {
scoreMatrix[i][0] = F[i][0] = 0;
E[i][0] = Integer.MIN_VALUE;
}
for (j = 1; j <= subject.length(); j++) {
scoreMatrix[0][j] = E[0][j] = 0;
F[0][j] = Integer.MIN_VALUE;
}
for (i = 1; i <= query.length(); i++)
for (j = 1; j <= subject.length(); j++) {
E[i][j] = Math.max(E[i][j - 1], scoreMatrix[i][j - 1] + insert) + gapExt;
F[i][j] = Math.max(F[i - 1][j], scoreMatrix[i - 1][j] + delete) + gapExt;
scoreMatrix[i][j] = max(0, E[i][j],
F[i][j],
scoreMatrix[i - 1][j - 1] + (query.charAt(i-1)==subject.charAt(j-1)?match:replace));
if (scoreMatrix[i][j] > scoreMatrix[maxI][maxJ]) {
maxI = i;
maxJ = j;
}
}
/*
* Here starts the traceback for affine gap penalities
*/
boolean[] gap_extend = { false, false };
j = maxJ;
for (i = maxI; i > 0;) {
do {
// only Deletes or Inserts or Replaces possible.
// That's not what we want to have.
if (scoreMatrix[i][j] == 0) {
queryStart = i;
targetStart = j;
i = j = 0;
// Match/Replace
} else if (((scoreMatrix[i][j]
- (scoreMatrix[i - 1][j - 1] + (query.charAt(i-1)==subject.charAt(j-1)?match:replace))) == 0)
&& !(gap_extend[0] || gap_extend[1])) {
if (query.charAt(i-1) == subject.charAt(j-1)) {
pathBuilder.append('|');
identitySize++;
} else {
pathBuilder.append(' ');
}
alignBuilder[0].append(query.charAt(i-1));
alignBuilder[1].append(subject.charAt(j-1));
i--;
j--;
// Insert || finish gap if extended gap is
// opened
} else if (scoreMatrix[i][j] == E[i][j] || gap_extend[0]) {
// check if gap has been extended or freshly
// opened
gap_extend[0] = E[i][j] - (scoreMatrix[i][j - 1] + insert + gapExt) == 0;
alignBuilder[0].append('-');
alignBuilder[1].append(subject.charAt(j-1));
pathBuilder.append(' ');
j--;
// Delete || finish gap if extended gap is
// opened
} else {
// check if gap has been extended or freshly
// opened
gap_extend[1] = F[i][j] - (scoreMatrix[i - 1][j] + delete + gapExt) == 0;
alignBuilder[0].append(query.charAt(i-1));
alignBuilder[1].append('-');
pathBuilder.append(' ');
i--;
}
} while (j > 0);
}
}
private void backtrace(String query, String subject, int[][] scoreMatrix,
StringBuilder pathBuilder, StringBuilder[] alignBuilder) {
int i;
int j;
j = maxJ;
for (i = maxI; i > 0;) {
do {
// only Deletes or Inserts or Replaces possible.
// That's not what
// we want to have.
if (scoreMatrix[i][j] == 0) {
queryStart = i;
targetStart = j;
i = j = 0;
// Match/Replace
} else {
char queryChar = query.charAt(i-1);
char subjectChar = subject.charAt(j-1);
if (scoreMatrix[i][j]
- (scoreMatrix[i - 1][j - 1] + (queryChar==subjectChar?match:replace)) == 0) {
if (queryChar == subjectChar) {
pathBuilder.append('|');
identitySize++;
} else {
pathBuilder.append(' ');
}
alignBuilder[0].append(queryChar);
alignBuilder[1].append(subjectChar);
i--;
j--;
// Insert
} else if (scoreMatrix[i][j] - (scoreMatrix[i][j - 1] + insert) == 0) {
alignBuilder[0].append('-');
alignBuilder[1].append(subjectChar);
pathBuilder.append(' ');
j--;
// Delete
} else {
alignBuilder[0].append(queryChar);
alignBuilder[1].append('-');
pathBuilder.append(' ');
i--;
}
}
} while (j > 0);
}
}
@Override
public String getQueryAligned() {
return align[0];
}
@Override
public String getTargetAligned() {
return align[1];
}
@Override
public String getPath() {
return path;
}
@Override
public int getQueryStart() {
return queryStart + 1;
}
@Override
public int getQueryEnd() {
return maxI;
}
@Override
public int getTargetStart() {
return targetStart + 1;
}
@Override
public int getTargetEnd() {
return maxJ;
}
@Override
public int getScore() {
return score;
}
@Override
public int getIdentitySize() {
return identitySize;
}
}