/* * Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br) * Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com) * * For further information check the LICENSE file. */ package bio.pih.genoogle.tests.seq.generator; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import junit.framework.TestCase; import org.junit.AfterClass; import org.junit.Test; import bio.pih.genoogle.seq.Alphabet; import bio.pih.genoogle.seq.DNAAlphabet; import bio.pih.genoogle.seq.IllegalSymbolException; import bio.pih.genoogle.seq.Sequence; import bio.pih.genoogle.seq.generator.DNASequencesPopulator; import bio.pih.genoogle.seq.generator.RandomSequenceGenerator; /** * @author albrecht * */ public class SequencePopulatorTest extends TestCase { private static final String sequencePopulationTestFile = "data" + File.separator + "populator" + File.separator + "sequencePopulatorTest.seqs"; @Override @AfterClass public void tearDown() { try { removeIfExistFile(sequencePopulationTestFile); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private static void removeIfExistFile(String filePath) throws IOException { File serializableSequencePopulationFile = new File(filePath); if (serializableSequencePopulationFile.exists()) { if (serializableSequencePopulationFile.delete() == false) { throw new IOException("Was not possible to delete " + serializableSequencePopulationFile); } } } /** * Test if the length and {@link Alphabet} of sequence generated are correct. */ public void testSequenceGenerator() throws IllegalSymbolException { int length = 1; RandomSequenceGenerator randomSequenceGenerator = new RandomSequenceGenerator(DNAAlphabet.SINGLETON, length); Sequence sequence = randomSequenceGenerator.generateSequence(); assertEquals(1, sequence.getLength()); assertEquals(DNAAlphabet.SINGLETON, sequence.getAlphabet()); length = 10; randomSequenceGenerator = new RandomSequenceGenerator(DNAAlphabet.SINGLETON, length); sequence = randomSequenceGenerator.generateSequence(); assertEquals(length, sequence.getLength()); assertEquals(DNAAlphabet.SINGLETON, sequence.getAlphabet()); length = 100; randomSequenceGenerator = new RandomSequenceGenerator(DNAAlphabet.SINGLETON, length); sequence = randomSequenceGenerator.generateSequence(); assertEquals(length, sequence.getLength()); assertEquals(DNAAlphabet.SINGLETON, sequence.getAlphabet()); length = 250; randomSequenceGenerator = new RandomSequenceGenerator(DNAAlphabet.SINGLETON, length); sequence = randomSequenceGenerator.generateSequence(); assertEquals(length, sequence.getLength()); assertEquals(DNAAlphabet.SINGLETON, sequence.getAlphabet()); length = 1000; randomSequenceGenerator = new RandomSequenceGenerator(DNAAlphabet.SINGLETON, length); sequence = randomSequenceGenerator.generateSequence(); assertEquals(length, sequence.getLength()); assertEquals(DNAAlphabet.SINGLETON, sequence.getAlphabet()); length = 10000; randomSequenceGenerator = new RandomSequenceGenerator(DNAAlphabet.SINGLETON, length); sequence = randomSequenceGenerator.generateSequence(); assertEquals(length, sequence.getLength()); assertEquals(DNAAlphabet.SINGLETON, sequence.getAlphabet()); } /** * Test if the length of the generated sequences are correct */ public void testDNASequencesPopulator() throws IllegalSymbolException { int from = 0; int to = 3; List<Sequence> sequences = DNASequencesPopulator.populateSequences(100, from, to); for(Sequence sequence: sequences) { assertTrue(sequence.getLength() >= from); assertTrue(sequence.getLength() <= to); } from = 0; to = 100; sequences = DNASequencesPopulator.populateSequences(100, from, to); for(Sequence sequence: sequences) { assertTrue(sequence.getLength() >= from); assertTrue(sequence.getLength() <= to); } from = 99; to = 100; sequences = DNASequencesPopulator.populateSequences(100, from, to); for(Sequence sequence: sequences) { assertTrue(sequence.getLength() >= from); assertTrue(sequence.getLength() <= to); } from = 0; to = 1000; sequences = DNASequencesPopulator.populateSequences(100, from, to); for(Sequence sequence: sequences) { assertTrue(sequence.getLength() >= from); assertTrue(sequence.getLength() <= to); } from = 999; to = 1000; sequences = DNASequencesPopulator.populateSequences(100, from, to); for(Sequence sequence: sequences) { assertTrue(sequence.getLength() >= from); assertTrue(sequence.getLength() <= to); } from = 1; to = 2; sequences = DNASequencesPopulator.populateSequences(100, from, to); for(Sequence sequence: sequences) { assertTrue(sequence.getLength() >= from); assertTrue(sequence.getLength() <= to); } } /** * Test if the save and load sequence population from a file is working */ public void testCreateSaveAndLoadSequencePopulation() throws IllegalSymbolException, FileNotFoundException, IOException, ClassNotFoundException { List<Sequence> sequences = new LinkedList<Sequence>(); String stringSequence = "CATGACTGGCATCAGTGCATGCATGCAGTCAGTATATATGACGC"; Sequence ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 1"); sequences.add(ss); stringSequence = "ACATGCTCGATGTGTGTGTATCAGTACTGACCTAGCATGACTCAGTACACATGACGTCATCATGTAGCGTCTAGACTGACTACGTACGACTGCATACGACTATCAGACTGACTACGCATGACGTACGTGTACGTACTGATGACGTACTATCGTAGCATGACTACGTACGACTGAC"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 1"); sequences.add(ss); stringSequence = "ATGCTAGCATTCAGTACGTACGCATGATGCTAGATCGCATGACTAGCACGTACTGCATCGTGTGTGTCATGTGACTGAC"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 2"); sequences.add(ss); stringSequence = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 3"); sequences.add(ss); stringSequence = "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 4"); sequences.add(ss); stringSequence = "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 5"); sequences.add(ss); stringSequence = "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 6"); sequences.add(ss); stringSequence = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCA"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 7"); sequences.add(ss); stringSequence = "ATCTGAGTCATGCGATCAGTGTTGGTCATGTCAGGTCAGTACTACGTAGCATGCATGCATACGATCGACTATATTGCATGAC"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 8"); sequences.add(ss); stringSequence = "AAAAAAACAAAAAAAGAAAAAAATTTTTTTGCATCAGATTTTTTTTCAGTACTGCATGACTACTGTGAC"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 9"); sequences.add(ss); stringSequence = "TGCAGTACGTACGTGTTGAGTGCTATGCATGTTTAGGCGCGGCGCTAGCATGCATCAGACGCATACGTGTACGTACGTACTGATTCAGACTGAC"; ss = new Sequence(DNAAlphabet.SINGLETON, stringSequence, "Sequence 10"); sequences.add(ss); removeIfExistFile(sequencePopulationTestFile); DNASequencesPopulator.writePopulation(sequences, sequencePopulationTestFile); List<Sequence> readSequences = DNASequencesPopulator.readPopulation(sequencePopulationTestFile); Iterator<Sequence> iterator = readSequences.iterator(); assertEquals(iterator.next().seqString().toUpperCase(), "CATGACTGGCATCAGTGCATGCATGCAGTCAGTATATATGACGC"); assertEquals(iterator.next().seqString().toUpperCase(), "ACATGCTCGATGTGTGTGTATCAGTACTGACCTAGCATGACTCAGTACACATGACGTCATCATGTAGCGTCTAGACTGACTACGTACGACTGCATACGACTATCAGACTGACTACGCATGACGTACGTGTACGTACTGATGACGTACTATCGTAGCATGACTACGTACGACTGAC"); assertEquals(iterator.next().seqString().toUpperCase(), "ATGCTAGCATTCAGTACGTACGCATGATGCTAGATCGCATGACTAGCACGTACTGCATCGTGTGTGTCATGTGACTGAC"); assertEquals(iterator.next().seqString().toUpperCase(), "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); assertEquals(iterator.next().seqString().toUpperCase(), "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"); assertEquals(iterator.next().seqString().toUpperCase(), "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"); assertEquals(iterator.next().seqString().toUpperCase(), "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"); assertEquals(iterator.next().seqString().toUpperCase(), "ACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCA"); assertEquals(iterator.next().seqString().toUpperCase(), "ATCTGAGTCATGCGATCAGTGTTGGTCATGTCAGGTCAGTACTACGTAGCATGCATGCATACGATCGACTATATTGCATGAC"); assertEquals(iterator.next().seqString().toUpperCase(), "AAAAAAACAAAAAAAGAAAAAAATTTTTTTGCATCAGATTTTTTTTCAGTACTGCATGACTACTGTGAC"); assertEquals(iterator.next().seqString().toUpperCase(), "TGCAGTACGTACGTGTTGAGTGCTATGCATGTTTAGGCGCGGCGCTAGCATGCATCAGACGCATACGTGTACGTACGTACTGATTCAGACTGAC"); assertFalse(iterator.hasNext()); removeIfExistFile(sequencePopulationTestFile); } /** * What is enough? May be 1k sequences is enough ? May be one million is huge, but 1k is enough for this test, I think... */ @Test public void testCreateSaveAndLoadRandomSequencePopulation() throws FileNotFoundException, IOException, ClassNotFoundException, IllegalSymbolException { int stepSize = 10; int maxSequences = 1000; int lengthFrom = 20; int lengthTo = 700; List<List<Sequence>> populations = new LinkedList<List<Sequence>>(); for (int sequenceQuantity = stepSize; sequenceQuantity <= maxSequences; sequenceQuantity *= stepSize) { System.out.println("Creating population: " + sequenceQuantity + " sequences"); String populationPath = sequencePopulationTestFile + "_" + sequenceQuantity; List<Sequence> populateSequences = DNASequencesPopulator.populateSequences(sequenceQuantity, lengthFrom, lengthTo); populations.add(populateSequences); removeIfExistFile(populationPath); DNASequencesPopulator.writePopulation(populateSequences, populationPath); } int listCreatedPos = 0; for (int sequenceQuantity = stepSize; sequenceQuantity <= maxSequences; sequenceQuantity *= stepSize) { String populationPath = sequencePopulationTestFile + "_" + sequenceQuantity; System.out.println("Testing population: " + sequenceQuantity + " sequences"); Iterator<Sequence> storedPopulationIterator = DNASequencesPopulator.readPopulation(populationPath).iterator(); Iterator<Sequence> createdPopulationIterator = populations.get(listCreatedPos).iterator(); while (storedPopulationIterator.hasNext() && createdPopulationIterator.hasNext()) { Sequence nextStored = storedPopulationIterator.next(); Sequence nextCreated = createdPopulationIterator.next(); checkSequenceEquals(nextStored, nextCreated); } assertFalse(storedPopulationIterator.hasNext()); assertFalse(createdPopulationIterator.hasNext()); removeIfExistFile(populationPath); listCreatedPos++; } } private static void checkSequenceEquals(Sequence seq1, Sequence seq2) { assertEquals(seq1.getAlphabet().getClass(), seq2.getAlphabet().getClass()); assertEquals(seq1.getName(), seq2.getName()); assertEquals(seq1.getLength(), seq2.getLength()); assertEquals(seq1.seqString(), seq2.seqString()); } }