package fr.orsay.lri.varna.models.treealign;
import java.util.ArrayList;
/**
*
*
* @author Raphael Champeimont
*/
public class ExampleDistance3 implements TreeAlignLabelDistanceSymmetric<RNANodeValue2> {
public double f(RNANodeValue2 v1, RNANodeValue2 v2) {
if (v1 == null) {
if (v2 == null) {
return 0;
} else if (!v2.isSingleNode()) { // v2 is a list of bases
// We insert all bases, with a cost of 1 for each base.
return v2.getNodes().size();
} else { // v2 is a single node
return 2;
}
} else if (!v1.isSingleNode()) { // v1 is a list of bases
if (v2 == null) {
return v1.getNodes().size();
} else if (!v2.isSingleNode()) { // v2 is a list of bases
// We compute the sequence distance
return alignSequenceNodes(v1, v2).getDistance();
} else { // v2 is a single node
return 2 + v1.getNodes().size();
}
} else { // v1 is a single node
// all the same as when v1 == null
if (v2 == null) {
return 2;
} else if (!v2.isSingleNode()) { // v2 is a list of bases
return 2 + v2.getNodes().size();
} else { // v2 is a single node
String l1 = v1.getNode().getLeftNucleotide();
String r1 = v1.getNode().getRightNucleotide();
String l2 = v2.getNode().getLeftNucleotide();
String r2 = v2.getNode().getRightNucleotide();
// We have cost(subst((x,y) to (x',y'))) = 1
// when x != x' and y != y'.
// It means it is less than substituting 2 non-paired bases
return (!l1.equals(l2) ? 0.5 : 0)
+ (!r1.equals(r2) ? 0.5 : 0);
}
}
}
public class SequenceAlignResult {
private double distance;
private int[][] alignment;
public double getDistance() {
return distance;
}
public void setDistance(double distance) {
this.distance = distance;
}
/** The result array is a matrix of height 2
* and width at most length(sequence A) + length(sequence B).
* with result[0] is the alignment for A
* and result[1] the alignment for B.
* The alignment consists int the indexes of the original
* bases positions, with -1 when there is no match.
*/
public int[][] getAlignment() {
return alignment;
}
public void setAlignment(int[][] alignment) {
this.alignment = alignment;
}
}
/**
* Align two sequences contained in nodes.
* Both nodes have to be non-single nodes, otherwise an
* RNANodeValue2WrongTypeException exception will be thrown.
*/
public SequenceAlignResult alignSequenceNodes(RNANodeValue2 v1, RNANodeValue2 v2) {
char[] A = v1.computeSequence();
char[] B = v2.computeSequence();
return alignSequences(A, B);
}
/**
* Align sequences using the Needleman-Wunsch algorithm.
* Time: O(A.length * B.length)
* Space: O(A.length * B.length)
* Space used by the returned object: O(A.length + B.length)
*/
public SequenceAlignResult alignSequences(char[] A, char[] B) {
SequenceAlignResult result = new SequenceAlignResult();
final int la = A.length;
final int lb = B.length;
double[][] F = new double[la+1][lb+1];
int[][] decisions = new int[la+1][lb+1];
final double d = 1; // insertion/deletion cost
final double substCost = 1; // substitution cost
for (int i=0; i<=la; i++)
F[i][0] = d*i;
for (int j=0; j<=lb; j++)
F[0][j] = d*j;
for (int i=1; i<=la; i++)
for (int j=1; j<=lb; j++)
{
double min;
int decision;
double match = F[i-1][j-1] + (A[i-1] == B[j-1] ? 0 : substCost);
double delete = F[i-1][j] + d;
if (match < delete) {
decision = 1;
min = match;
} else {
decision = 2;
min = delete;
}
double insert = F[i][j-1] + d;
if (insert < min) {
decision = 3;
min = insert;
}
F[i][j] = min;
decisions[i][j] = decision;
}
result.setDistance(F[la][lb]);
int[][] alignment = computeAlignment(F, decisions, A, B);
result.setAlignment(alignment);
return result;
}
private int[][] computeAlignment(double[][] F, int[][] decisions, char[] A, char[] B) {
// At worst the alignment will be of length (A.length + B.length)
ArrayList<Integer> AlignmentA = new ArrayList<Integer>(A.length + B.length);
ArrayList<Integer> AlignmentB = new ArrayList<Integer>(A.length + B.length);
int i = A.length;
int j = B.length;
while (i > 0 && j > 0)
{
int decision = decisions[i][j];
switch (decision) {
case 1:
AlignmentA.add(i-1);
AlignmentB.add(j-1);
i = i - 1;
j = j - 1;
break;
case 2:
AlignmentA.add(i-1);
AlignmentB.add(-1);
i = i - 1;
break;
case 3:
AlignmentA.add(-1);
AlignmentB.add(j-1);
j = j - 1;
break;
default:
throw (new Error("Bug in ExampleDistance3: decision = " + decision));
}
}
while (i > 0)
{
AlignmentA.add(i-1);
AlignmentB.add(-1);
i = i - 1;
}
while (j > 0)
{
AlignmentA.add(-1);
AlignmentB.add(j-1);
j = j - 1;
}
// Convert the ArrayLists to the right format:
// We need to reverse the list and change the numbering of
int l = AlignmentA.size();
int[][] result = new int[2][l];
for (i=0; i<l; i++) {
result[0][i] = AlignmentA.get(l-1-i);
result[1][i] = AlignmentB.get(l-1-i);
}
return result;
}
}