/*
* Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br)
* Copyright (C) 2008,2009, 2010, 2011, 2012 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com)
*
* For further information check the LICENSE file.
*/
package bio.pih.genoogle.encoder;
import bio.pih.genoogle.index.ValueOutOfBoundsException;
import bio.pih.genoogle.seq.AminoAcidAlphabet;
import bio.pih.genoogle.seq.IllegalSymbolException;
import bio.pih.genoogle.seq.LightweightSymbolList;
import bio.pih.genoogle.seq.SymbolList;
/**
* Abstract class of the Reduced AA Encoder to bit map representation.
*
* @author albrecht
*/
public class AminoAcidsSequenceEncoder extends SequenceEncoder {
protected AminoAcidsSequenceEncoder(int subSequenceLength) throws ValueOutOfBoundsException {
super(AminoAcidAlphabet.SINGLETON, subSequenceLength);
}
// TODO: read it from alphabet.
char letters[] = {'G', 'A', 'V', 'L', 'I', 'S', 'T', 'D', 'E', 'N', 'Q', 'K', 'R', 'H', 'F', 'C', 'W', 'Y', 'M', 'P', '$', '#'};
public final int getBitsFromChar(char symbol) {
if (symbol == 'G') {
return 0;
}
if (symbol == 'A') {
return 1;
}
if (symbol == 'V') {
return 2;
}
if (symbol == 'L') {
return 3;
}
if (symbol == 'I') {
return 4;
}
if (symbol == 'S') {
return 5;
}
if (symbol == 'T') {
return 6;
}
if (symbol == 'D') {
return 7;
}
if (symbol == 'E') {
return 8;
}
if (symbol == 'N') {
return 9;
}
if (symbol == 'Q') {
return 10;
}
if (symbol == 'K') {
return 11;
}
if (symbol == 'R') {
return 12;
}
if (symbol == 'H') {
return 13;
}
if (symbol == 'F') {
return 14;
}
if (symbol == 'C') {
return 15;
}
if (symbol == 'W') {
return 16;
}
if (symbol == 'Y') {
return 17;
}
if (symbol == 'M') {
return 18;
}
if (symbol == 'P') {
return 19;
}
if (symbol == '$') {
return 20;
}
if (symbol == '#') {
return 21;
}
throw new RuntimeException("Invalid symbol " + symbol);
}
public final char getSymbolFromBits(int bits) {
return letters[bits];
}
public static void main(String[] args) throws IllegalSymbolException {
SequenceEncoder e = new AminoAcidsSequenceEncoder(3);
System.out.println(e.bitsByAlphabetSize);
System.out.println(e.bitsMask);
System.out.println(e.subSequenceLength);
SymbolList s = LightweightSymbolList.createProtein("GAVLISTDENQKRHFCWYMP$#");
int[] encodeSubSequenceToInteger = e.encodeSymbolListToIntegerArray(s);
String ss = e.decodeIntegerArrayToString(encodeSubSequenceToInteger);
System.out.println(ss);
System.out.println(s.seqString().equals(ss));
}
}