/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.alignment;
import org.biojava.nbio.core.alignment.matrices.SimpleSubstitutionMatrix;
import org.biojava.nbio.alignment.Alignments.PairwiseSequenceScorerType;
import org.biojava.nbio.alignment.Alignments.ProfileProfileAlignerType;
import org.biojava.nbio.alignment.template.GapPenalty;
import org.biojava.nbio.alignment.template.PairwiseSequenceScorer;
import org.biojava.nbio.core.alignment.template.Profile;
import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
import org.biojava.nbio.core.util.ConcurrencyTools;
import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
public class CookbookMSAProfiler {
private static class Profiler {
private long maxMemoryUsed, timeCheckpoint;
private final long timeStart;
private Profiler() {
maxMemoryUsed = Runtime.getRuntime().totalMemory();
timeStart = timeCheckpoint = System.nanoTime();
}
private long getMaxMemoryUsed() {
return maxMemoryUsed = Math.max(maxMemoryUsed, Runtime.getRuntime().totalMemory());
}
private long getTimeSinceCheckpoint() {
return System.nanoTime() - timeCheckpoint;
}
private long getTimeSinceStart() {
return System.nanoTime() - timeStart;
}
private void setCheckpoint() {
maxMemoryUsed = Math.max(maxMemoryUsed, Runtime.getRuntime().totalMemory());
timeCheckpoint = System.nanoTime();
}
}
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("The first argument must be a fasta file of protein sequences.");
return;
}
// ConcurrencyTools.setThreadPoolSingle();
PrintStream fout = new PrintStream("msa.txt");
Profiler profiler = new Profiler();
System.out.printf("Loading sequences from %s... ", args[0]);
List<ProteinSequence> list = new ArrayList<ProteinSequence>();
list.addAll(FastaReaderHelper.readFastaProteinSequence(new File(args[0])).values());
if (args.length > 1 && Integer.parseInt(args[1]) < list.size()) {
System.out.printf("%s/%d", args[1], list.size());
list = list.subList(0, Integer.parseInt(args[1]));
} else {
System.out.printf("%d", list.size());
}
System.out.printf(" sequences in %d ms using %d kB%n%n", profiler.getTimeSinceCheckpoint()/1000000,
profiler.getMaxMemoryUsed()/1024);
profiler.setCheckpoint();
System.out.print("Stage 1: pairwise similarity calculation... ");
GapPenalty gaps = new SimpleGapPenalty();
SubstitutionMatrix<AminoAcidCompound> blosum62 = SimpleSubstitutionMatrix.getBlosum62();
List<PairwiseSequenceScorer<ProteinSequence, AminoAcidCompound>> scorers = Alignments.getAllPairsScorers(list,
PairwiseSequenceScorerType.GLOBAL_IDENTITIES, gaps, blosum62);
Alignments.runPairwiseScorers(scorers);
System.out.printf("%d scores in %d ms using %d kB%n%n", scorers.size(),
profiler.getTimeSinceCheckpoint()/1000000, profiler.getMaxMemoryUsed()/1024);
profiler.setCheckpoint();
System.out.print("Stage 2: hierarchical clustering into a guide tree... ");
GuideTree<ProteinSequence, AminoAcidCompound> tree = new GuideTree<ProteinSequence, AminoAcidCompound>(list,
scorers);
scorers = null;
System.out.printf("%d ms using %d kB%n%n%s%n%n", profiler.getTimeSinceCheckpoint()/1000000,
profiler.getMaxMemoryUsed()/1024, tree);
profiler.setCheckpoint();
System.out.print("Stage 3: progressive alignment... ");
Profile<ProteinSequence, AminoAcidCompound> msa = Alignments.getProgressiveAlignment(tree,
ProfileProfileAlignerType.GLOBAL, gaps, blosum62);
System.out.printf("%d profile-profile alignments in %d ms using %d kB%n%n", list.size() - 1,
profiler.getTimeSinceCheckpoint()/1000000, profiler.getMaxMemoryUsed()/1024);
fout.print(msa);
fout.close();
ConcurrencyTools.shutdown();
System.out.printf("Total time: %d ms%nMemory use: %d kB%n", profiler.getTimeSinceStart()/1000000,
profiler.getMaxMemoryUsed()/1024);
}
}