/** * BioJava development code * * This code may be freely distributed and modified under the terms of the GNU * Lesser General Public Licence. This should be distributed with the code. If * you do not have a copy, see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * * Created on Oct 5, 2011 * Created by Andreas Prlic * * @since 3.0.2 */ package org.biojava.nbio.alignment; import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; import junit.framework.TestCase; import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType; import org.biojava.nbio.alignment.template.PairwiseSequenceAligner; import org.biojava.nbio.core.alignment.template.Profile; import org.biojava.nbio.core.alignment.template.SequencePair; import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; import org.biojava.nbio.core.exceptions.CompoundNotFoundException; import org.biojava.nbio.core.sequence.DNASequence; import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet; import org.biojava.nbio.core.sequence.compound.DNACompoundSet; import org.biojava.nbio.core.sequence.compound.NucleotideCompound; import org.biojava.nbio.core.sequence.io.FastaReaderHelper; import org.biojava.nbio.core.util.ConcurrencyTools; import java.io.InputStream; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; public class TestDNAAlignment extends TestCase { private static final double PRECISION = 0.00000001; public void testDNAAlignment() { try { List<DNASequence> lst = getDNAFASTAFile(); Profile<DNASequence, NucleotideCompound> profile = Alignments.getMultipleSequenceAlignment(lst); assertTrue(profile.getSize() == 10); assertTrue(profile.getAlignedSequence(1).getSequenceAsString().length() > 50); // here how to print the MSA: //System.out.printf("MSA:%n%s%n", profile); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } ConcurrencyTools.shutdown(); } private static List<DNASequence> getDNAFASTAFile() throws Exception { InputStream inStream = TestDNAAlignment.class.getResourceAsStream(String.format("/dna-fasta.txt")); LinkedHashMap<String, DNASequence> fastas = FastaReaderHelper.readFastaDNASequence(inStream); List<DNASequence> sequences = new ArrayList<DNASequence>(); for (String key : fastas.keySet()) { DNASequence seq = fastas.get(key); sequences.add(seq); } return sequences; } /** * @author brandstaetter */ public void testDNAMultipleAlignmentWithMixedCompoundSets() throws CompoundNotFoundException { DNASequence target = new DNASequence("ACTGACGTGTAGCTGACTGA", DNACompoundSet.getDNACompoundSet()); DNASequence query = new DNASequence("ACTGACGTGTAGCTGACTGTA", AmbiguityDNACompoundSet.getDNACompoundSet()); List<DNASequence> lst = new ArrayList<DNASequence>(); lst.add(target); lst.add(query); try { @SuppressWarnings("unused") Profile<DNASequence, NucleotideCompound> profile = Alignments.getMultipleSequenceAlignment(lst); fail("Alignments.getMultipleSequenceAlignment(lst) expected exception with differing compound sets"); } catch (IllegalArgumentException ex) { // expected exception } } /** * @author brandstaetter */ public void testDNAPairwiseAlignmentWithMixedCompoundSets() throws CompoundNotFoundException { DNASequence target = new DNASequence("ACTGACGTGTAGCTGACTGA", DNACompoundSet.getDNACompoundSet()); DNASequence query = new DNASequence("ACTGACGTGTAGCTGACTGT", AmbiguityDNACompoundSet.getDNACompoundSet()); SubstitutionMatrix<NucleotideCompound> matrix = SubstitutionMatrixHelper.getNuc4_4(); SimpleGapPenalty gapP = new SimpleGapPenalty(); gapP.setOpenPenalty((short) 5); gapP.setExtensionPenalty((short) 2); try { @SuppressWarnings("unused") SequencePair<DNASequence, NucleotideCompound> psa = Alignments.getPairwiseAlignment(query, target, PairwiseSequenceAlignerType.LOCAL, gapP, matrix); fail("Alignments.getPairwiseAlignment() expected exception with differing compound sets"); } catch (IllegalArgumentException ex) { // expected exception } } /** * @author Daniel Cameron */ public void testMixedCaseInputStringsMatchUnderlyingBases() throws CompoundNotFoundException { DNASequence target = new DNASequence("AAAAAAAAGTC", DNACompoundSet.getDNACompoundSet()); DNASequence query = new DNASequence("aaaaaaaagtc", DNACompoundSet.getDNACompoundSet()); SubstitutionMatrix<NucleotideCompound> matrix = SubstitutionMatrixHelper.getNuc4_4(); SimpleGapPenalty gapP = new SimpleGapPenalty((short)5, (short)2); // should be a full match with +5 per match assertEquals(5.0 * query.getLength(), Alignments.getPairwiseAligner(query, target, PairwiseSequenceAlignerType.LOCAL, gapP, matrix).getScore(), PRECISION); } /** * @author Daniel Cameron */ public void testNoAlignedBases() throws CompoundNotFoundException { DNASequence target = new DNASequence("A", DNACompoundSet.getDNACompoundSet()); DNASequence query = new DNASequence("T", DNACompoundSet.getDNACompoundSet()); SubstitutionMatrix<NucleotideCompound> matrix = SubstitutionMatrixHelper.getNuc4_4(); SimpleGapPenalty gapP = new SimpleGapPenalty((short)0, (short)1); PairwiseSequenceAligner<DNASequence, NucleotideCompound> aligner = Alignments.getPairwiseAligner(query, target, PairwiseSequenceAlignerType.GLOBAL, gapP, matrix); assertEquals(2, aligner.getPair().getLength()); } /** * @author Daniel Cameron */ public void testLinearAlignment() throws CompoundNotFoundException { DNASequence query = new DNASequence("GTAAAAG", DNACompoundSet.getDNACompoundSet()); DNASequence target = new DNASequence("GAAAACGTTTTTTTTTT", DNACompoundSet.getDNACompoundSet()); SubstitutionMatrix<NucleotideCompound> matrix = SubstitutionMatrixHelper.getNuc4_4(); SimpleGapPenalty gapP = new SimpleGapPenalty((short)0, (short)3); PairwiseSequenceAligner<DNASequence, NucleotideCompound> aligner = Alignments.getPairwiseAligner(query, target, PairwiseSequenceAlignerType.GLOBAL, gapP, matrix); assertEquals(String.format("GTAAAA-G----------%nG-AAAACGTTTTTTTTTT%n"), aligner.getPair().toString());; } }