/*
* Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br)
* Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com)
*
* For further information check the LICENSE file.
*/
package bio.pih.genoogle.tests.encoder;
import junit.framework.TestCase;
import org.junit.Test;
import bio.pih.genoogle.encoder.SequenceEncoder;
import bio.pih.genoogle.encoder.SequenceEncoderFactory;
import bio.pih.genoogle.index.ValueOutOfBoundsException;
import bio.pih.genoogle.seq.DNAAlphabet;
import bio.pih.genoogle.seq.IllegalSymbolException;
import bio.pih.genoogle.seq.LightweightSymbolList;
import bio.pih.genoogle.seq.RNAAlphabet;
import bio.pih.genoogle.seq.SymbolList;
/**
* Test the encoding and decoding from {@link DNASequenceCompressorToInteger}
*
* @author albrecht
*/
public class SequenceEncoderToIntegerTest extends TestCase {
@Test
public void testEncodeDNASubSymbolList() throws ValueOutOfBoundsException, IllegalSymbolException {
SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(DNAAlphabet.SINGLETON, 8);
String stringSequence = "TCGGACTG"; // 1101101000011110
SymbolList symbolList = LightweightSymbolList.createDNA(stringSequence);
assertEquals(Integer.parseInt("1101101000011110", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "AACAACAA"; // 0000010000010000
symbolList = LightweightSymbolList.createDNA(stringSequence);
assertEquals(Integer.parseInt("0000010000010000", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "CCCCCCCC"; // 0101010101010101
symbolList = LightweightSymbolList.createDNA(stringSequence);
assertEquals(Integer.parseInt("0101010101010101", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "TTTTTTTT"; // 1111111111111111
symbolList = LightweightSymbolList.createDNA(stringSequence);
assertEquals(Integer.parseInt("1111111111111111", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "ACTGGTCA"; // 0001111010110100
symbolList = LightweightSymbolList.createDNA(stringSequence);
assertEquals(Integer.parseInt("0001111010110100", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "ATTTTTTT"; // 001111111111111
symbolList = LightweightSymbolList.createDNA(stringSequence);
assertEquals(Integer.parseInt("0011111111111111", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "TCTAGCCA"; // 1101110010010100
symbolList = LightweightSymbolList.createDNA(stringSequence);
assertEquals(Integer.parseInt("1101110010010100", 2), encoder.encodeSubSequenceToInteger(symbolList));
}
@Test
public void testDecodeToStringSDNASubSequence() throws ValueOutOfBoundsException {
SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(DNAAlphabet.SINGLETON, 8);
// String stringSequence = "TCGGACTG"; // 1101101000011110
String stringSequence = encoder.decodeIntegerToString(Integer.parseInt("1101101000011110", 2));
assertEquals("TCGGACTG", stringSequence);
// String stringSequence = "AACAACAA"; // 0000010000010000
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("0000010000010000", 2));
assertEquals("AACAACAA", stringSequence);
// stringSequence = "CCCCCCCC"; // 0101010101010101
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("0101010101010101", 2));
assertEquals("CCCCCCCC", stringSequence);
// stringSequence = "TTTTTTTT"; // 1111111111111111
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("1111111111111111", 2));
assertEquals("TTTTTTTT", stringSequence);
// stringSequence = "ACTGGTCA"; // 0001111010110100
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("0001111010110100", 2));
assertEquals("ACTGGTCA", stringSequence);
// stringSequence = "ATTTTTTT"; // 0011111111111111
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("0011111111111111", 2));
assertEquals("ATTTTTTT", stringSequence);
// stringSequence = "TCTAGCCA"; // 1101110010010100
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("1101110010010100", 2));
assertEquals("TCTAGCCA", stringSequence);
}
@Test
public void testEncodedAndDecodeToIntegerDNASubSequence() throws ValueOutOfBoundsException, IllegalSymbolException {
SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(DNAAlphabet.SINGLETON, 8);
String stringSequence = "TCGGACTG"; // 1101101000011110
SymbolList symbolList = LightweightSymbolList.createDNA(stringSequence);
int encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "AACAACAA"; // 0000010000010000
symbolList = LightweightSymbolList.createDNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "CCCCCCCC"; // 0101010101010101
symbolList = LightweightSymbolList.createDNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "TTTTTTTT"; // 1111111111111111
symbolList = LightweightSymbolList.createDNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "ACTGGTCA"; // 0001111010110100
symbolList = LightweightSymbolList.createDNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "ATTTTTTT"; // 0011111111111111
symbolList = LightweightSymbolList.createDNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "TCTAGCCA"; // 1101110010010100
symbolList = LightweightSymbolList.createDNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "TCTAGCAA"; // 1101110010010000
symbolList = LightweightSymbolList.createDNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
}
/**
* Test the sequence encoding of {@link DNASequenceCompressorToInteger}
*/
@Test
public void testDecodeToIntegerDNASequence() throws ValueOutOfBoundsException, IllegalSymbolException {
SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(DNAAlphabet.SINGLETON, 8);
SymbolList createDNA = LightweightSymbolList.createDNA("TCTAGCCAATTTTTTTACTGGTCATTTTTTTTCCCCCCCCAACAACAATCGGACTG");
int[] encodeSequenceToInteger = encoder.encodeSymbolListToIntegerArray(createDNA);
assertEquals(Integer.parseInt("1101110010010100", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()]);
assertEquals(Integer.parseInt("0011111111111111", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+1]);
assertEquals(Integer.parseInt("0001111010110100", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+2]);
assertEquals(Integer.parseInt("1111111111111111", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+3]);
assertEquals(Integer.parseInt("0101010101010101", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+4]);
assertEquals(Integer.parseInt("0000010000010000", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+5]);
assertEquals(Integer.parseInt("1101101000011110", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+6]);
assertEquals( createDNA.getLength(), encodeSequenceToInteger[SequenceEncoder.getPositionLength()]);
assertEquals( createDNA.seqString(), encoder.decodeIntegerArrayToString(encodeSequenceToInteger));
createDNA = LightweightSymbolList.createDNA("TCTAGC");
encodeSequenceToInteger = encoder.encodeSymbolListToIntegerArray(createDNA);
assertEquals(Integer.parseInt("1101110010010000", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()]);
assertEquals(createDNA.getLength(), encodeSequenceToInteger[SequenceEncoder.getPositionLength()]);
assertEquals(createDNA.seqString(), encoder.decodeIntegerArrayToString(encodeSequenceToInteger));
createDNA = LightweightSymbolList.createDNA("TTTTACTGGTC");
encodeSequenceToInteger = encoder.encodeSymbolListToIntegerArray(createDNA);
assertEquals(Integer.parseInt("1111111100011110", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()]);
assertEquals(Integer.parseInt("1011010000000000", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+1]);
assertEquals(createDNA.getLength(), encodeSequenceToInteger[SequenceEncoder.getPositionLength()]);
assertEquals(createDNA.seqString(), encoder.decodeIntegerArrayToString(encodeSequenceToInteger));
createDNA = LightweightSymbolList.createDNA("AAACACTA" + // 0000000100011100
"GCTACGTC" + // 1001110001101101
"GAATAGCA" + // 1000001100100100
"ACTGAGAT" + // 0001111000100011
"GCATGAGC" + // 1001001110001001
"ACAACTG"); // 0001000001111000
encodeSequenceToInteger = encoder.encodeSymbolListToIntegerArray(createDNA);
assertEquals(Integer.parseInt("0000000100011100", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()]);
assertEquals(Integer.parseInt("1001110001101101", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+1]);
assertEquals(Integer.parseInt("1000001100100100", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+2]);
assertEquals(Integer.parseInt("0001111000100011", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+3]);
assertEquals(Integer.parseInt("1001001110001001", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+4]);
assertEquals(Integer.parseInt("0001000001111000", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+5]);
assertEquals(createDNA.getLength(), encodeSequenceToInteger[SequenceEncoder.getPositionLength()]);
assertEquals(createDNA.seqString(), encoder.decodeIntegerArrayToString(encodeSequenceToInteger));
}
@Test
public void testEncodeRNASubSymbolList() throws ValueOutOfBoundsException, IllegalSymbolException {
SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(RNAAlphabet.SINGLETON, 8);
String stringSequence = "UCGGACUG"; // 1101101000011110
SymbolList symbolList = LightweightSymbolList.createRNA(stringSequence);
assertEquals(Integer.parseInt("1101101000011110", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "AACAACAA"; // 0000010000010000
symbolList = LightweightSymbolList.createRNA(stringSequence);
assertEquals(Integer.parseInt("0000010000010000", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "CCCCCCCC"; // 0101010101010101
symbolList = LightweightSymbolList.createRNA(stringSequence);
assertEquals(Integer.parseInt("0101010101010101", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "UUUUUUUU"; // 1111111111111111
symbolList = LightweightSymbolList.createRNA(stringSequence);
assertEquals(Integer.parseInt("1111111111111111", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "ACUGGUCA"; // 0001111010110100
symbolList = LightweightSymbolList.createRNA(stringSequence);
assertEquals(Integer.parseInt("0001111010110100", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "AUUUUUUU"; // 001111111111111
symbolList = LightweightSymbolList.createRNA(stringSequence);
assertEquals(Integer.parseInt("0011111111111111", 2), encoder.encodeSubSequenceToInteger(symbolList));
stringSequence = "UCUAGCCA"; // 1101110010010100
symbolList = LightweightSymbolList.createRNA(stringSequence);
assertEquals(Integer.parseInt("1101110010010100", 2), encoder.encodeSubSequenceToInteger(symbolList));
}
@Test
public void testDecodeToStringRNASubSequence() throws ValueOutOfBoundsException {
SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(RNAAlphabet.SINGLETON, 8);
// String stringSequence = "TCGGACTG"; // 1101101000011110
String stringSequence = encoder.decodeIntegerToString(Integer.parseInt("1101101000011110", 2));
assertEquals("UCGGACUG", stringSequence);
// String stringSequence = "AACAACAA"; // 0000010000010000
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("0000010000010000", 2));
assertEquals("AACAACAA", stringSequence);
// stringSequence = "CCCCCCCC"; // 0101010101010101
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("0101010101010101", 2));
assertEquals("CCCCCCCC", stringSequence);
// stringSequence = "TTTTTTTT"; // 1111111111111111
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("1111111111111111", 2));
assertEquals("UUUUUUUU", stringSequence);
// stringSequence = "ACTGGTCA"; // 0001111010110100
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("0001111010110100", 2));
assertEquals("ACUGGUCA", stringSequence);
// stringSequence = "ATTTTTTT"; // 0011111111111111
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("0011111111111111", 2));
assertEquals("AUUUUUUU", stringSequence);
// stringSequence = "TCTAGCCA"; // 1101110010010100
stringSequence = encoder.decodeIntegerToString(Integer.parseInt("1101110010010100", 2));
assertEquals("UCUAGCCA", stringSequence);
}
@Test
public void testEncodedAndDecodeToIntegerRNASubSequence() throws ValueOutOfBoundsException, IllegalSymbolException {
SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(RNAAlphabet.SINGLETON, 8);
String stringSequence = "UCGGACUG"; // 1101101000011110
SymbolList symbolList = LightweightSymbolList.createRNA(stringSequence);
int encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "AACAACAA"; // 0000010000010000
symbolList = LightweightSymbolList.createRNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "CCCCCCCC"; // 0101010101010101
symbolList = LightweightSymbolList.createRNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "UUUUUUUU"; // 1111111111111111
symbolList = LightweightSymbolList.createRNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "ACUGGUCA"; // 0001111010110100
symbolList = LightweightSymbolList.createRNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "AUUUUUUU"; // 0011111111111111
symbolList = LightweightSymbolList.createRNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "UCUAGCCA"; // 1101110010010100
symbolList = LightweightSymbolList.createRNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
stringSequence = "UCUAGCAA"; // 1101110010010000
symbolList = LightweightSymbolList.createRNA(stringSequence);
encoded = encoder.encodeSubSequenceToInteger(symbolList);
assertEquals(symbolList.seqString(), encoder.decodeIntegerToString(encoded));
}
@Test
public void testDecodeToIntegerRNASequence() throws ValueOutOfBoundsException, IllegalSymbolException {
SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(RNAAlphabet.SINGLETON, 8);
SymbolList createRNA = LightweightSymbolList.createRNA("UCUAGCCAAUUUUUUUACUGGUCAUUUUUUUUCCCCCCCCAACAACAAUCGGACUG");
int[] encodeSequenceToInteger = encoder.encodeSymbolListToIntegerArray(createRNA);
assertEquals(Integer.parseInt("1101110010010100", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()]);
assertEquals(Integer.parseInt("0011111111111111", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+1]);
assertEquals(Integer.parseInt("0001111010110100", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+2]);
assertEquals(Integer.parseInt("1111111111111111", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+3]);
assertEquals(Integer.parseInt("0101010101010101", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+4]);
assertEquals(Integer.parseInt("0000010000010000", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+5]);
assertEquals(Integer.parseInt("1101101000011110", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+6]);
assertEquals( createRNA.getLength(), encodeSequenceToInteger[SequenceEncoder.getPositionLength()]);
assertEquals( createRNA.seqString(), encoder.decodeIntegerArrayToString(encodeSequenceToInteger));
createRNA = LightweightSymbolList.createRNA("UCUAGC");
encodeSequenceToInteger = encoder.encodeSymbolListToIntegerArray(createRNA);
assertEquals(Integer.parseInt("1101110010010000", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()]);
assertEquals(createRNA.getLength(), encodeSequenceToInteger[SequenceEncoder.getPositionLength()]);
assertEquals(createRNA.seqString(), encoder.decodeIntegerArrayToString(encodeSequenceToInteger));
createRNA = LightweightSymbolList.createRNA("UUUUACUGGUC");
encodeSequenceToInteger = encoder.encodeSymbolListToIntegerArray(createRNA);
assertEquals(Integer.parseInt("1111111100011110", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()]);
assertEquals(Integer.parseInt("1011010000000000", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+1]);
assertEquals(createRNA.getLength(), encodeSequenceToInteger[SequenceEncoder.getPositionLength()]);
assertEquals(createRNA.seqString(), encoder.decodeIntegerArrayToString(encodeSequenceToInteger));
createRNA = LightweightSymbolList.createRNA("AAACACUA" + // 0000000100011100
"GCUACGUC" + // 1001110001101101
"GAAUAGCA" + // 1000001100100100
"ACUGAGAU" + // 0001111000100011
"GCAUGAGC" + // 1001001110001001
"ACAACUG"); // 0001000001111000
encodeSequenceToInteger = encoder.encodeSymbolListToIntegerArray(createRNA);
assertEquals(Integer.parseInt("0000000100011100", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()]);
assertEquals(Integer.parseInt("1001110001101101", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+1]);
assertEquals(Integer.parseInt("1000001100100100", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+2]);
assertEquals(Integer.parseInt("0001111000100011", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+3]);
assertEquals(Integer.parseInt("1001001110001001", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+4]);
assertEquals(Integer.parseInt("0001000001111000", 2), encodeSequenceToInteger[SequenceEncoder.getPositionBeginBitsVector()+5]);
assertEquals(createRNA.getLength(), encodeSequenceToInteger[SequenceEncoder.getPositionLength()]);
assertEquals(createRNA.seqString(), encoder.decodeIntegerArrayToString(encodeSequenceToInteger));
}
}