/*
* Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br)
* Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com)
*
* For further information check the LICENSE file.
*/
package bio.pih.genoogle.encoder;
import bio.pih.genoogle.seq.SymbolList;
import bio.pih.genoogle.util.SymbolListWindowIterator;
import bio.pih.genoogle.util.SymbolListWindowIteratorFactory;
/**
* Class that apply a mask to the given sub-sequences.
*
* @author albrecht
*/
public final class MaskEncoder {
private final boolean[] mask;
private final int patternLength;
private final int resultLength;
private final SequenceEncoder encoder;
/**
* @param mask Mask where "1" means that the base should be preserved and "0" that should be removed.
* @param subSequenceLength The subsequence length, the value should be the total of "1"s at the mask.
*/
public MaskEncoder(final String mask, final SequenceEncoder encoder) {
this.patternLength = mask.length();
this.mask = new boolean[patternLength];
this.encoder = encoder;
int length = 0;
for (int i = 0; i < this.patternLength; i++) {
if (mask.charAt(i) == '1') {
this.mask[i] = true;
length++;
}
}
if (length != encoder.getSubSequenceLength()) {
throw new RuntimeException("The subSequenceLength (" + encoder.getSubSequenceLength()
+ ") and the count of the usable values of the mask (" + length + ") should be the same.");
}
this.resultLength = length;
}
public int getPatternLength() {
return patternLength;
}
/**
* Apply the mask in a informed {@link SymbolList} and return the encoded masked sequence.
* @param symbolList sequence
* @return encoded masked {@link SymbolList}.
*/
public int applyMask(SymbolList symbolList) {
int encoded = 0;
int offset = 0;
int length = symbolList.getLength();
for (int i = 1; i <= length; i++) {
if (this.mask[i - 1]) {
encoded |= (encoder.getBitsFromChar(symbolList.symbolAt(i)) << ((resultLength - (i - offset)) << 1));
} else {
offset++;
}
}
return encoded;
}
/**
* Apply the mask in a informed {@link String} sub-sequence and return the encoded masked sequence.
* @param subSequence
* @return encoded masked sequence.
*/
public int applyMask(String subSequence) {
int encoded = 0;
int offset = 0;
int length = subSequence.length();
for (int i = 0; i < length; i++) {
if (this.mask[i]) {
encoded |= (encoder.getBitsFromChar(subSequence.charAt(i)) << ((resultLength - (i - offset + 1)) << 1));
} else {
offset++;
}
}
return encoded;
}
/**
* Apply the mask on a portion of the given sub-sequence
* @param begin of the sub-sequence
* @param end of the sub-sequence.
* @param subSequence where the mask will be applied.
* @return encoded version of the masked sub-sequence.
*/
public int applyMask(int begin, int end, String subSequence) {
int encoded = 0;
int offset = 0;
for (int i = begin; i < end; i++) {
int pos = i - begin;
if (this.mask[pos]) {
encoded |= (encoder.getBitsFromChar(subSequence.charAt(i)) << ((resultLength - (pos - offset + 1)) << 1));
} else {
offset++;
}
}
return encoded;
}
/**
* Apply mask in a whole {@link SymbolList} sequence.
* @param sequence where the mask will be applied.
* @return encoded version of the masked sequence.
*/
public int[] applySequenceMask(SymbolList sequence) {
assert (sequence.getAlphabet().equals(encoder.getAlphabet()));
int size = sequence.getLength() / this.patternLength;
size++; // extra space for information on the length.
int sequenceEncoded[] = new int[size];
sequenceEncoded[SequenceEncoder.getPositionLength()] = sequence.getLength();
int pos = SequenceEncoder.getPositionBeginBitsVector();
SymbolListWindowIterator symbolListWindowIterator = SymbolListWindowIteratorFactory.getNotOverlappedFactory().newSymbolListWindowIterator(
sequence, this.patternLength);
while (symbolListWindowIterator.hasNext()) {
SymbolList next = symbolListWindowIterator.next();
sequenceEncoded[pos] = applyMask(next);
pos++;
}
return sequenceEncoded;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("Pattern: ");
sb.append("\"");
for (int i = 0; i < patternLength; i++) {
if (this.mask[i]) {
sb.append('X');
} else {
sb.append(' ');
}
}
sb.append("\" ");
sb.append(patternLength);
sb.append(" ");
sb.append(resultLength);
return sb.toString();
}
}