/*
* Copyright 2015 MiLaboratory.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.milaboratory.core.alignment;
import com.milaboratory.core.Range;
import com.milaboratory.core.io.binary.AlignmentSerializer;
import com.milaboratory.core.mutations.Mutation;
import com.milaboratory.core.mutations.Mutations;
import com.milaboratory.core.sequence.Alphabet;
import com.milaboratory.core.sequence.Sequence;
import com.milaboratory.primitivio.annotations.Serializable;
import com.milaboratory.util.BitArray;
import com.milaboratory.util.IntArrayList;
import java.util.ArrayList;
import java.util.List;
import static com.milaboratory.core.mutations.Mutation.*;
/**
* @author Dmitry Bolotin
* @author Stanislav Poslavsky
*/
@Serializable(by = AlignmentSerializer.class)
public final class Alignment<S extends Sequence<S>> implements java.io.Serializable {
/**
* Initial sequence. (upper sequence in alignment; sequence1)
*/
final S sequence1;
/**
* Mutations
*/
final Mutations<S> mutations;
/**
* Range in initial sequence (sequence1)
*/
final Range sequence1Range;
/**
* Range in mutated sequence (sequence2)
*/
final Range sequence2Range;
/**
* Alignment score
*/
final float score;
public Alignment(S sequence1, Mutations<S> mutations, float score) {
this(sequence1, mutations, new Range(0, sequence1.size()),
new Range(0, sequence1.size() + mutations.getLengthDelta()),
score);
}
public Alignment(S sequence1, Mutations<S> mutations, AlignmentScoring<S> scoring) {
this(sequence1, mutations, new Range(0, sequence1.size()),
new Range(0, sequence1.size() + mutations.getLengthDelta()),
scoring);
}
public Alignment(S sequence1, Mutations<S> mutations,
Range sequence1Range, Range sequence2Range,
AlignmentScoring<S> scoring) {
this(sequence1, mutations, sequence1Range, sequence2Range,
AlignmentUtils.calculateScore(sequence1, sequence1Range, mutations, scoring));
}
public Alignment(S sequence1, Mutations<S> mutations,
Range sequence1Range, Range sequence2Range,
float score) {
if (!mutations.isEmpty()) {
if (!mutations.isCompatibleWith(sequence1)
|| !sequence1Range.contains(mutations.getMutatedRange())
|| sequence1Range.length() + mutations.getLengthDelta() != sequence2Range.length())
throw new IllegalArgumentException("Not compatible arguments: muts: " + mutations + " range1: " + sequence1Range + " range2: " + sequence2Range);
} else if (sequence1Range.length() != sequence2Range.length())
throw new IllegalArgumentException("Not compatible arguments.");
this.sequence1 = sequence1;
this.mutations = mutations;
this.sequence1Range = sequence1Range;
this.sequence2Range = sequence2Range;
this.score = score;
}
/**
* Calculates score for this alignment using another scoring.
*
* @param scoring scoring
* @return alignment score
*/
public int calculateScore(AlignmentScoring<S> scoring) {
return AlignmentUtils.calculateScore(sequence1, sequence1Range, mutations, scoring);
}
/**
* Returns alignment iterator.
*
* @return alignment iterator
*/
public AlignmentIteratorForward<S> forwardIterator() {
return new AlignmentIteratorForward<>(mutations, sequence1Range, sequence2Range.getFrom());
}
/**
* Returns reverse alignment iterator.
*
* @return reverse alignment iterator
*/
public AlignmentIteratorReverse<S> reverseIterator() {
return new AlignmentIteratorReverse<>(mutations, sequence1Range, sequence2Range.getTo());
}
/**
* Return initial sequence (upper sequence in alignment).
*
* @return initial sequence (upper sequence in alignment)
*/
public S getSequence1() {
return sequence1;
}
/**
* Returns mutations in absolute (global) {@code sequence1} coordinates.
*
* @return mutations in absolute (global) {@code sequence1} coordinates
*/
public Mutations<S> getAbsoluteMutations() {
return mutations;
}
/**
* Returns mutations in local coordinates, relative to {@code sequence1range}.
*
* @return mutations in local coordinates, relative to {@code sequence1range}
*/
public Mutations<S> getRelativeMutations() {
return mutations.move(-sequence1Range.getLower());
}
/**
* Returns aligned range of sequence1.
*
* @return aligned range of sequence1
*/
public Range getSequence1Range() {
return sequence1Range;
}
/**
* Returns aligned range of sequence2.
*
* @return aligned range of sequence2
*/
public Range getSequence2Range() {
return sequence2Range;
}
///**
// * Extracts sub-alignment: cut alignment corresponding to certain sequence range in seq1.
// *
// * @param range range in seq1
// * @return sub-alignment
// */
//public Alignment<S> subAlignment(Range range, AlignmentScoring<S> scoring) {
// return new Alignment<S>(sequence1, mutations.extractAbsoluteMutationsForRange(range), range,
// con)
//}
/**
* Converts specified position from sequence1 coordinates to sequence2 coordinates. If position is out of aligned
* range of sequence1, returns -1. If letter at specified position in sequence1 is removed in sequence2, than
* returns {@code -2 - p}, where {@code p} is a position of previous letter in sequence2.
*
* @param positionInSeq1 position in sequence1
* @return position in coordinates of sequence2, or -1 if specified position is out of aligned range of sequence1,
* or if letter at specified position in sequence1 is removed in sequence2 --- {@code -2 - p} where {@code p} is a
* position of next letter in sequence2
*/
public int convertToSeq2Position(int positionInSeq1) {
if (!sequence1Range.containsBoundary(positionInSeq1))
return -1;
int p = mutations.convertToSeq2Position(positionInSeq1);
if (p < 0)
return -2 - (~p + sequence2Range.getFrom() - sequence1Range.getFrom());
return p + sequence2Range.getFrom() - sequence1Range.getFrom();
}
/**
* Converts specified position from sequence2 coordinates to sequence1 coordinates. If position is out of aligned
* range of sequence2, returns -1. If letter at specified position in sequence2 is removed in sequence1, than
* returns {@code -2 - p}, where {@code p} is a position of previous letter in sequence2.
*
* @param positionInSeq2 position in sequence2
* @return position in coordinates of sequence1, or -1 if specified position is out of aligned range of sequence2,
* or if letter at specified position in sequence2 is removed in sequence2 --- {@code -2 - p} where {@code p} is a
* position of next letter in sequence1
*/
public int convertToSeq1Position(int positionInSeq2) {
if (!sequence2Range.containsBoundary(positionInSeq2))
return -1;
positionInSeq2 += -sequence2Range.getFrom() + sequence1Range.getFrom();
int p = mutations.convertToSeq1Position(positionInSeq2);
return p < 0 ? -2 - ~p : p;
}
/**
* Converts range in sequence2 to range in sequence1, or returns null if input range is not fully covered by
* alignment
*
* @param rangeInSeq2 range in sequence 2
* @return range in sequence1 or null if rangeInSeq2 is not fully covered by alignment
*/
public Range convertToSeq1Range(Range rangeInSeq2) {
int from = aabs(convertToSeq1Position(rangeInSeq2.getFrom()));
int to = aabs(convertToSeq1Position(rangeInSeq2.getTo()));
if (from == -1 || to == -1)
return null;
return new Range(from, to);
}
/**
* Converts range in sequence1 to range in sequence2, or returns null if input range is not fully covered by
* alignment
*
* @param rangeInSeq1 range in sequence 1
* @return range in sequence2 or null if rangeInSeq1 is not fully covered by alignment
*/
public Range convertToSeq2Range(Range rangeInSeq1) {
int from = aabs(convertToSeq2Position(rangeInSeq1.getFrom()));
int to = aabs(convertToSeq2Position(rangeInSeq1.getTo()));
if (from == -1 || to == -1)
return null;
return new Range(from, to);
}
/**
* Return alignment score
*
* @return alignment score
*/
public float getScore() {
return score;
}
/**
* Having sequence2 creates alignment from sequence2 to sequence1
*
* @param sequence2 sequence2
* @return inverted alignment
*/
public Alignment<S> invert(S sequence2) {
return new Alignment<>(sequence2, getRelativeMutations().invert().move(sequence2Range.getFrom()),
sequence2Range, sequence1Range, score);
}
/**
* Returns number of matches divided by sum of number of matches and mismatches.
*
* @return number of matches divided by sum of number of matches and mismatches
*/
public float similarity() {
int match = 0, mismatch = 0;
AlignmentIteratorForward<S> iterator = forwardIterator();
while (iterator.advance()) {
final int mut = iterator.getCurrentMutation();
if (mut == NON_MUTATION)
++match;
else
++mismatch;
}
return 1.0f * match / (match + mismatch);
}
/**
* Returns alignment helper to simplify alignment output in conventional (BLAST) form.
*
* @return alignment helper
*/
public AlignmentHelper getAlignmentHelper() {
List<Boolean> matches = new ArrayList<>();
IntArrayList pos1 = new IntArrayList(sequence1.size() + mutations.size()),
pos2 = new IntArrayList(sequence1.size() + mutations.size());
StringBuilder sb1 = new StringBuilder(),
sb2 = new StringBuilder();
Alphabet<S> alphabet = mutations.getAlphabet();
AlignmentIteratorForward<S> iterator = forwardIterator();
while (iterator.advance()) {
final int mut = iterator.getCurrentMutation();
switch (getRawTypeCode(mut)) {
case RAW_MUTATION_TYPE_SUBSTITUTION:
pos1.add(iterator.getSeq1Position());
pos2.add(iterator.getSeq2Position());
sb1.append(sequence1.symbolAt(iterator.getSeq1Position()));
sb2.append(Mutation.getToSymbol(mut, alphabet));
matches.add(false);
break;
case RAW_MUTATION_TYPE_DELETION:
pos1.add(iterator.getSeq1Position());
pos2.add(-1 - iterator.getSeq2Position());
sb1.append(sequence1.symbolAt(iterator.getSeq1Position()));
sb2.append("-");
matches.add(false);
break;
case RAW_MUTATION_TYPE_INSERTION:
pos1.add(-1 - iterator.getSeq1Position());
pos2.add(iterator.getSeq2Position());
sb1.append("-");
sb2.append(Mutation.getToSymbol(mut, alphabet));
matches.add(false);
break;
default:
pos1.add(iterator.getSeq1Position());
pos2.add(iterator.getSeq2Position());
char c = sequence1.symbolAt(iterator.getSeq1Position());
sb1.append(c);
sb2.append(c);
matches.add(true);
break;
}
}
return new AlignmentHelper(sb1.toString(), sb2.toString(),
pos1.toArray(), pos2.toArray(),
new BitArray(matches));
}
@Override
public String toString() {
return getAlignmentHelper().toCompactString();
}
public String toCompactString() {
return "" + sequence1Range.getFrom() + "|" + sequence1Range.getTo() + "|" + sequence1.size() +
"|" + sequence2Range.getFrom() + "|" + sequence2Range.getTo() + "|" + mutations.encode() + "|" + score;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Alignment alignment = (Alignment) o;
if (Float.compare(alignment.score, score) != 0) return false;
if (!mutations.equals(alignment.mutations)) return false;
if (!sequence1.equals(alignment.sequence1)) return false;
if (!sequence1Range.equals(alignment.sequence1Range)) return false;
if (!sequence2Range.equals(alignment.sequence2Range)) return false;
return true;
}
@Override
public int hashCode() {
int result = sequence1.hashCode();
result = 31 * result + mutations.hashCode();
result = 31 * result + sequence1Range.hashCode();
result = 31 * result + sequence2Range.hashCode();
result = 31 * result + (score != +0.0f ? Float.floatToIntBits(score) : 0);
return result;
}
public static int aabs(int position) {
if (position == -1)
return -1;
if (position < 0)
return -2 - position;
return position;
}
}