/*
* Copyright 2015 MiLaboratory.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.milaboratory.core.sequence;
import com.milaboratory.core.alignment.batch.HasSequence;
import com.milaboratory.core.motif.Motif;
import com.milaboratory.primitivio.annotations.Serializable;
/**
* Parent class for all types of sequences. Each element of sequence (e.g. nucleotide, or amino acid)
* encoded in byte, so {@code Sequence} is a simple container of ordered bytes; the correspondence between byte codes
* and particular elements is defined in {@link com.milaboratory.core.sequence.Alphabet} that corresponds to this
* type of sequence (via {@link #getAlphabet()}).
*
* @param <S> type of sequence
* @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com)
* @author Shugay Mikhail (mikhail.shugay@gmail.com)
* @see com.milaboratory.core.sequence.Alphabet
* @see com.milaboratory.core.sequence.SequenceBuilder
* @see com.milaboratory.core.sequence.NucleotideSequence
* @see com.milaboratory.core.sequence.AminoAcidSequence
*/
@Serializable(by = IO.SequenceSerializer.class)
public abstract class Sequence<S extends Sequence<S>> extends AbstractSeq<S> implements Comparable<S>, HasSequence<S> {
/**
* Returns letter code at specified position.
*
* @param position position in sequence
* @return element at specified position
*/
public abstract byte codeAt(int position);
/**
* Returns the alphabet corresponding to this type of sequence.
*
* @return alphabet corresponding to this type of sequence
*/
public abstract Alphabet<S> getAlphabet();
/**
* Returns an array of bytes that encodes this sequence.
*
* @return array of bytes that encodes this sequence
*/
public byte[] asArray() {
byte[] bytes = new byte[size()];
for (int i = size() - 1; i >= 0; --i)
bytes[i] = codeAt(i);
return bytes;
}
/**
* Returns a character representation of element at specified position.
*
* @param position position in this sequence
* @return character representation of element at specified position
*/
public char symbolAt(int position) {
return getAlphabet().codeToSymbol(codeAt(position));
}
/**
* Converts sequnce to motif data structure efficient for exact and fuzzy wildcard-aware matching and searching of
* sequences.
*
* @return motif
*/
@SuppressWarnings("unchecked")
public Motif<S> toMotif() {
return new Motif(this);
}
public boolean containWildcards() {
for (int i = 0; i < size(); i++)
if (getAlphabet().isWildcard(codeAt(i)))
return true;
return false;
}
@Override
public S getSequence() {
return (S) this;
}
@Override
public SequenceBuilder<S> getBuilder() {
return getAlphabet().createBuilder();
}
@Override
public boolean equals(Object obj) {
if (obj == null)
return false;
if (!(obj instanceof Sequence))
return false;
final Sequence other = (Sequence) obj;
if (other.getAlphabet() != getAlphabet())
return false;
if (other.size() != this.size())
return false;
for (int i = size() - 1; i >= 0; --i)
if (other.codeAt(i) != codeAt(i))
return false;
return true;
}
@Override
public int hashCode() {
int hash = 7;
hash += 31 * getAlphabet().hashCode();
for (int i = size() - 1; i >= 0; --i)
hash = hash * 7 + codeAt(i);
return hash;
}
@Override
public String toString() {
char[] chars = new char[size()];
for (int i = 0; i < size(); i++)
chars[i] = getAlphabet().codeToSymbol(codeAt(i));
return new String(chars);
}
@Override
public int compareTo(S o) {
if (this.getAlphabet() != o.getAlphabet())
throw new RuntimeException();
if (this.size() != o.size())
if (this.size() < o.size())
return -1;
else
return 1;
byte b0, b1;
for (int i = 0; i < size(); i++) {
b0 = this.codeAt(i);
b1 = o.codeAt(i);
if (b0 != b1)
if (b0 < b1)
return -1;
else
return 1;
}
return 0;
}
/**
* Tests whether this sequence contains {@code subSequence} and returns position of the first matched letter in
* this sequence or -1 if it does not contain {@code subSequence}.
*
* @param subSequence subsequence
* @return position of the first matched letter in this sequence or -1 if it does not contain {@code subSequence}
*/
public int indexOf(S subSequence) {
if (subSequence.size() == 0)
return -1;
int limit = size() - subSequence.size();
next:
for (int i = 0; i <= limit; i++) {
for (int j = 0; j < subSequence.size(); j++)
if (subSequence.codeAt(j) != codeAt(i + j))
continue next;
return i;
}
return -1;
}
}