/*
* Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br)
* Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com)
*
* For further information check the LICENSE file.
*/
package bio.pih.genoogle.search;
import bio.pih.genoogle.alignment.SubstitutionMatrix;
import bio.pih.genoogle.encoder.SequenceEncoder;
/**
* Extend sequences by its similarity to the right and to the left.
*
* @author albrecht
*/
public class ExtendSequences {
private final int[] encodedQuery;
private final int[] encodedTarget;
private final int beginQuerySegment;
private final int endQuerySegment;
private final int beginTargetSegment;
private final int endTargetSegment;
private final int readFrame;
public ExtendSequences(int[] encodedQuery, int[] encodedTarget, int beginQuerySegment, int endQuerySegment,
int beginTargetSegment, int endTargetSegment, int readFrame) {
this.encodedQuery = encodedQuery;
this.encodedTarget = encodedTarget;
this.beginQuerySegment = beginQuerySegment;
this.endQuerySegment = endQuerySegment;
this.beginTargetSegment = beginTargetSegment;
this.endTargetSegment = endTargetSegment;
this.readFrame = readFrame;
}
String queryExtendedString = null;
public int getReadFrame() {
return readFrame;
}
public int getBeginQuerySegment() {
return beginQuerySegment;
}
public int getEndQuerySegment() {
return endQuerySegment;
}
public int getBeginTargetSegment() {
return beginTargetSegment;
}
public int getEndTargetSegment() {
return endTargetSegment;
}
public int[] getEncodedQuery() {
return encodedQuery;
}
public int[] getEncodedTarget() {
return encodedTarget;
}
/**
* @param encodedQuerySequence
* @param beginQuerySegment
* @param endQuerySegment
* @param encodedDatabankSequence
* @param beginDatabankSequenceSegment
* @param endDatabankSequenceSegment
* @param dropoff
* @param subSequenceLength
* @param encoder
*
* @return {@link ExtendSequences} of the extended sequences.
*/
public static ExtendSequences doExtension(int[] encodedQuerySequence, int beginQuerySegment, int endQuerySegment,
int[] encodedDatabankSequence, int beginDatabankSequenceSegment, int endDatabankSequenceSegment,
int dropoff, SequenceEncoder extensionEncoder, final SubstitutionMatrix substitutionTable, final int readFrame) {
int score = 0;
int bestScore = 0;
int bestQueryPos, bestDatabankPos;
int queryPos, databankPos;
final int subSequenceLength = extensionEncoder.getSubSequenceLength();
// right extend
bestQueryPos = endQuerySegment;
bestDatabankPos = endDatabankSequenceSegment;
queryPos = endQuerySegment + 1;
databankPos = endDatabankSequenceSegment + 1;
int queryLength = SequenceEncoder.getSequenceLength(encodedQuerySequence);
int databankLength = SequenceEncoder.getSequenceLength(encodedDatabankSequence);
// http://2.bp.blogspot.com/_a7jkcMVp5Vg/SMMSwfT7jXI/AAAAAAAAF5Q/vrtrqwk-z1c/s1600-h/usetheforce.jpg
while (queryPos < queryLength && databankPos < databankLength) {
int queryValue = extensionEncoder.getValueAtPos(encodedQuerySequence, queryPos, subSequenceLength);
int databankValue = extensionEncoder.getValueAtPos(encodedDatabankSequence, databankPos, subSequenceLength);
if (substitutionTable == null) {
if (queryValue == databankValue) {
score++;
}
} else {
char a = extensionEncoder.getSymbolFromBits(queryValue);
char b = extensionEncoder.getSymbolFromBits(databankValue);
score += substitutionTable.getValue(a, b);
}
if (score >= bestScore) {
bestScore = score;
bestQueryPos = queryPos;
bestDatabankPos = databankPos;
}
if (bestScore - score > dropoff) {
break;
}
queryPos++;
databankPos++;
}
int rightBestQueryPos = bestQueryPos;
int rightBestDatabankPos = bestDatabankPos;
// left extend
score = 0;
bestScore = 0;
bestQueryPos = beginQuerySegment;
bestDatabankPos = beginDatabankSequenceSegment;
queryPos = beginQuerySegment - 1;
databankPos = beginDatabankSequenceSegment - 1;
while (queryPos >= 0 && databankPos >= 0) {
int queryValue = extensionEncoder.getValueAtPos(encodedQuerySequence, queryPos, subSequenceLength);
int databankValue = extensionEncoder.getValueAtPos(encodedDatabankSequence, databankPos, subSequenceLength);
if (substitutionTable == null) {
if (queryValue == databankValue) {
score++;
}
} else {
char a = extensionEncoder.getSymbolFromBits(queryValue);
char b = extensionEncoder.getSymbolFromBits(databankValue);
score += substitutionTable.getValue(a, b);
}
if (score >= bestScore) {
bestScore = score;
bestQueryPos = queryPos;
bestDatabankPos = databankPos;
}
if (bestScore - score > dropoff) {
break;
}
queryPos--;
databankPos--;
}
return new ExtendSequences(encodedQuerySequence, encodedDatabankSequence, bestQueryPos, rightBestQueryPos, bestDatabankPos, rightBestDatabankPos, readFrame);
}
@Override
public int hashCode() {
return super.hashCode();
}
@Override
public boolean equals(Object anObject) {
if (super.equals(anObject)) {
return true;
}
if (!(anObject instanceof ExtendSequences)) {
return false;
}
ExtendSequences other = (ExtendSequences) anObject;
if (this.getBeginQuerySegment() != other.getBeginQuerySegment()) {
return false;
}
if (this.getBeginTargetSegment() != other.getBeginTargetSegment()) {
return false;
}
if (this.getEndQuerySegment() != other.getEndQuerySegment()) {
return false;
}
if (this.getEndTargetSegment() != other.getEndTargetSegment()) {
return false;
}
if (!(this.encodedQuery == other.getEncodedQuery())) {
return false;
}
if (!(this.encodedTarget == other.getEncodedTarget())) {
return false;
}
return true;
}
}