/*
* Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br)
* Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com)
*
* For further information check the LICENSE file.
*/
package bio.pih.genoogle.tests.io.reader;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.NoSuchElementException;
import junit.framework.TestCase;
import org.junit.Test;
import bio.pih.genoogle.io.reader.IOTools;
import bio.pih.genoogle.io.reader.ParseException;
import bio.pih.genoogle.io.reader.RichSequenceStreamReader;
import bio.pih.genoogle.seq.DNAAlphabet;
import bio.pih.genoogle.seq.IllegalSymbolException;
import bio.pih.genoogle.seq.RichSequence;
public class RichSequenceFastaFileReaderTest extends TestCase {
String giSequences = ">gi|6626248|gb|AE000657.1| Aquifex aeolicus VF5, complete genome\n"
+ "TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT\n"
+ "TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT\n"
+ ">gi|114053012|ref|NM_001046239.1| Bos taurus CD36 antigen like (MGC137452), mRNA\n"
+ "TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT\n"
+ "TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT\n";
@Test
public void testGiFastaFormatReader() throws NoSuchElementException, IOException, ParseException,
IllegalSymbolException {
StringReader sr = new StringReader(giSequences);
RichSequenceStreamReader reader = IOTools.readFasta(new BufferedReader(sr), DNAAlphabet.SINGLETON);
RichSequence richSequence = reader.nextRichSequence();
assertEquals("gi", richSequence.getType());
assertEquals("6626248", richSequence.getGi());
assertEquals("gb", richSequence.getName());
assertEquals("AE000657.1", richSequence.getAccession());
assertEquals(" Aquifex aeolicus VF5, complete genome", richSequence.getDescription());
assertEquals(
"TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGTTGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT",
richSequence.seqString());
richSequence = reader.nextRichSequence();
assertEquals("gi", richSequence.getType());
assertEquals("114053012", richSequence.getGi());
assertEquals("ref", richSequence.getName());
assertEquals("NM_001046239.1", richSequence.getAccession());
assertEquals(" Bos taurus CD36 antigen like (MGC137452), mRNA", richSequence.getDescription());
assertEquals(
"TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGTTGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT",
richSequence.seqString());
}
String lclSequences = ">lcl|Sequence_X\n"
+ "TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT\n"
+ ">lcl|RandomSequence_494.0|RandomSequence_494 bla bla bla\n"
+ "TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT\n";
@Test
public void testLclFastaFormatReader() throws NoSuchElementException, IOException, ParseException,
IllegalSymbolException {
StringReader sr = new StringReader(lclSequences);
RichSequenceStreamReader reader = IOTools.readFasta(new BufferedReader(sr), DNAAlphabet.SINGLETON);
RichSequence richSequence = reader.nextRichSequence();
assertEquals("lcl", richSequence.getType());
assertEquals("Sequence_X", richSequence.getName());
assertEquals("", richSequence.getDescription());
assertEquals("TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT", richSequence.seqString());
richSequence = reader.nextRichSequence();
assertEquals("lcl", richSequence.getType());
assertEquals("RandomSequence_494.0", richSequence.getName());
assertEquals("RandomSequence_494 bla bla bla", richSequence.getDescription());
assertEquals("TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT", richSequence.seqString());
}
String unknowSequences = ">unknow|Sequence_X\n"
+ "TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT\n"
+ ">unknow|Blah|Blum|Zum| bla bla bla\n"
+ "TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT\n";
@Test
public void testUnknowFastaFormatReader() throws NoSuchElementException, IOException, ParseException,
IllegalSymbolException {
StringReader sr = new StringReader(unknowSequences);
RichSequenceStreamReader reader = IOTools.readFasta(new BufferedReader(sr), DNAAlphabet.SINGLETON);
RichSequence richSequence = reader.nextRichSequence();
assertEquals("unknow", richSequence.getType());
assertEquals("", richSequence.getName());
assertEquals("Sequence_X", richSequence.getDescription());
assertEquals("", richSequence.getGi());
assertEquals("", richSequence.getAccession());
assertEquals("TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT", richSequence.seqString());
richSequence = reader.nextRichSequence();
assertEquals("unknow", richSequence.getType());
assertEquals("Blah", richSequence.getName());
assertEquals("Blum", richSequence.getGi());
assertEquals("Zum", richSequence.getAccession());
assertEquals(" bla bla bla", richSequence.getDescription());
assertEquals("TGCAACGATGGACTGGATGCCCCAGGAAAAGGAAAGAGGTATAACCATAACCGTTGCAACGACCGCATGT", richSequence.seqString());
}
String influenzaSequence = ">gb|FJ966082:1-1701| /Human/HA/H1N1/USA/2009/04/01/hemagglutinin[Influenza A virus (A/California/04/2009(H1N1))]\n"
+ "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTT\n"
+ "ATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGT\n";
@Test
public void testInfluenzaSequence() throws NoSuchElementException, IOException, ParseException,
IllegalSymbolException {
StringReader sr = new StringReader(influenzaSequence);
RichSequenceStreamReader reader = IOTools.readFasta(new BufferedReader(sr), DNAAlphabet.SINGLETON);
RichSequence richSequence = reader.nextRichSequence();
assertEquals("gb", richSequence.getType());
assertEquals("FJ966082:1-1701", richSequence.getName());
assertEquals(" /Human/HA/H1N1/USA/2009/04/01/hemagglutinin[Influenza A virus (A/California/04/2009(H1N1))]",
richSequence.getDescription());
assertEquals("", richSequence.getGi());
assertEquals("", richSequence.getAccession());
assertEquals(
"ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGT",
richSequence.seqString());
}
String fiocruzHeader = ">NP_059666|NP_059666 putative cytochrome oxidase III [Plasmodium falciparum])\n"
+ "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGT";
@Test
public void testFiocruzSequence() throws NoSuchElementException, IOException, ParseException,
IllegalSymbolException {
StringReader sr = new StringReader(fiocruzHeader);
RichSequenceStreamReader reader = IOTools.readFasta(new BufferedReader(sr), DNAAlphabet.SINGLETON);
RichSequence richSequence = reader.nextRichSequence();
assertEquals("NP_059666", richSequence.getType());
assertEquals("", richSequence.getName());
assertEquals("NP_059666 putative cytochrome oxidase III [Plasmodium falciparum])", richSequence.getDescription());
assertEquals("", richSequence.getGi());
assertEquals("", richSequence.getAccession());
assertEquals(
"ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGT",
richSequence.seqString());
}
String emblCdsHeader = ">EMBLCDS:BAJ49870 BAJ49870.1 Candidatus Caldiarchaeum subterraneum archaeal cell division control protein 6\n" +
"ATGTCGGCTGCTTTGGATGAGTCTACACAGCTCTCCGAAAAACCGCCGAAGCGTTAGGAGTAGAGATTCCGAGA";
@Test
public void testEmblCdsSequence() throws NoSuchElementException, IOException, ParseException,
IllegalSymbolException {
StringReader sr = new StringReader(emblCdsHeader);
RichSequenceStreamReader reader = IOTools.readFasta(new BufferedReader(sr), DNAAlphabet.SINGLETON);
RichSequence richSequence = reader.nextRichSequence();
assertEquals("EMBLCDS", richSequence.getType());
assertEquals("BAJ49870.1", richSequence.getName());
assertEquals("Candidatus Caldiarchaeum subterraneum archaeal cell division control protein 6", richSequence.getDescription());
assertEquals("BAJ49870", richSequence.getGi());
assertEquals("", richSequence.getAccession());
assertEquals(
"ATGTCGGCTGCTTTGGATGAGTCTACACAGCTCTCCGAAAAACCGCCGAAGCGTTAGGAGTAGAGATTCCGAGA",
richSequence.seqString());
}
//
String emblCdsHeader2 = ">EMBLCDS:EBW45059 EBW45059.1 marine metagenome hypothetical protein#EMBLCDS:EDG62044 EDG62044.1 marine metagenome hypothetical protein\n" +
"ATGTCGGCTGCTTTGGATGAGTCTACACAGCTCTCCGAAAAACCGCCGAAGCGTTAGGAGTAGAGATTCCGAGA";
@Test
public void testEmblCdsSequence2() throws NoSuchElementException, IOException, ParseException,
IllegalSymbolException {
StringReader sr = new StringReader(emblCdsHeader2);
RichSequenceStreamReader reader = IOTools.readFasta(new BufferedReader(sr), DNAAlphabet.SINGLETON);
RichSequence richSequence = reader.nextRichSequence();
assertEquals("EMBLCDS", richSequence.getType());
assertEquals("EBW45059.1", richSequence.getName());
assertEquals("marine metagenome hypothetical protein#EMBLCDS:EDG62044 EDG62044.1 marine metagenome hypothetical protein", richSequence.getDescription());
assertEquals("EBW45059", richSequence.getGi());
assertEquals("", richSequence.getAccession());
assertEquals(
"ATGTCGGCTGCTTTGGATGAGTCTACACAGCTCTCCGAAAAACCGCCGAAGCGTTAGGAGTAGAGATTCCGAGA",
richSequence.seqString());
}
String contigSequenceHeader = ">contig00001_1 length=19730\n" +
"ATGTCGGCTGCTTTGGATGAGTCTACACAGCTCTCCGAAAAACCGCCGAAGCGTTAGGAGTAGAGATTCCGAGA";
@Test
public void testContigSequence() throws NoSuchElementException, IOException, ParseException,
IllegalSymbolException {
StringReader sr = new StringReader(contigSequenceHeader);
RichSequenceStreamReader reader = IOTools.readFasta(new BufferedReader(sr), DNAAlphabet.SINGLETON);
RichSequence richSequence = reader.nextRichSequence();
assertEquals("contig", richSequence.getType());
assertEquals("00001_1", richSequence.getName());
assertEquals("length=19730", richSequence.getDescription());
assertEquals("contig00001_1 length=19730", richSequence.getHeader());
assertEquals(
"ATGTCGGCTGCTTTGGATGAGTCTACACAGCTCTCCGAAAAACCGCCGAAGCGTTAGGAGTAGAGATTCCGAGA",
richSequence.seqString());
}
}