// $Id: PhylogeneticInferrer.java,v 1.17 2010/10/13 21:12:18 cmzmasek Exp $
// forester -- software libraries and applications
// for genomics and evolutionary biology research.
//
// Copyright (C) 2010 Christian M Zmasek
// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
// All rights reserved
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: cmzmasek@yahoo.com
// WWW: www.phylosoft.org/forester
package org.forester.archaeopteryx;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.swing.JOptionPane;
import org.forester.evoinference.distance.NeighborJoining;
import org.forester.evoinference.distance.PairwiseDistanceCalculator;
import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.evoinference.tools.BootstrapResampler;
import org.forester.io.parsers.FastaParser;
import org.forester.io.writers.SequenceWriter;
import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
import org.forester.msa.BasicMsa;
import org.forester.msa.MafftOLD;
import org.forester.msa.Mafft;
import org.forester.msa.Msa;
import org.forester.msa.MsaInferrer;
import org.forester.msa.MsaTools;
import org.forester.msa.ResampleableMsa;
import org.forester.phylogeny.Phylogeny;
import org.forester.sequence.Sequence;
import org.forester.tools.ConfidenceAssessor;
import org.forester.util.ForesterUtil;
public class PhylogeneticInferrer implements Runnable {
private Msa _msa;
private final MainFrameApplication _mf;
private final PhylogeneticInferenceOptions _options;
private final List<Sequence> _seqs;
public final static String MSA_FILE_SUFFIX = ".aln";
public final static String PWD_FILE_SUFFIX = ".pwd";
public PhylogeneticInferrer( final List<Sequence> seqs,
final PhylogeneticInferenceOptions options,
final MainFrameApplication mf ) {
_msa = null;
_seqs = seqs;
_mf = mf;
_options = options;
}
public PhylogeneticInferrer( final Msa msa,
final PhylogeneticInferenceOptions options,
final MainFrameApplication mf ) {
_msa = msa;
_seqs = null;
_mf = mf;
_options = options;
}
private Msa inferMsa() throws IOException {
File temp_seqs_file = File.createTempFile("aptx", ".fasta");
System.out.println( "temp file: " + temp_seqs_file );
//final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
writer.close();
final List<String> opts = new ArrayList<String>();
opts.add( "--maxiterate" );
opts.add( "1000" );
opts.add( "--localpair" );
opts.add( "--quiet" );
Msa msa = null;
try {
msa = runMAFFT( temp_seqs_file, opts );
}
catch ( final InterruptedException e ) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// copy aln file to intermediate dir file
// delete temp seqs file
return msa;
}
private Phylogeny inferPhylogeny( final Msa msa ) {
BasicSymmetricalDistanceMatrix m = null;
switch ( _options.getPwdDistanceMethod() ) {
case KIMURA_DISTANCE:
m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
break;
case POISSON_DISTANCE:
m = PairwiseDistanceCalculator.calcPoissonDistances( msa );
break;
case FRACTIONAL_DISSIMILARITY:
m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa );
break;
default:
throw new IllegalStateException( "invalid pwd method" );
}
if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
BufferedWriter pwd_writer;
try {
pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) );
m.write( pwd_writer );
pwd_writer.close();
}
catch ( final IOException e ) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
final NeighborJoining nj = new NeighborJoining();
final Phylogeny phy = nj.execute( m );
FastaParser.extractFastaInformation( phy );
return phy;
}
private void infer() {
//_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
if ( ( _msa == null ) && ( _seqs == null ) ) {
throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
}
if ( _msa == null ) {
Msa msa = null;
try {
msa = inferMsa();
}
catch ( final IOException e ) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println( msa.toString() );
System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
MsaTools msa_tools = MsaTools.createInstance();
msa = msa_tools.removeGapColumns( 0.5, 50, msa );
System.out.println( msa_tools.getIgnoredSequenceIds());
System.out.println( msa.toString() );
System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
_msa = msa;
}
final int n = _options.getBootstrapSamples();
final long seed = _options.getRandomNumberGeneratorSeed();
final Phylogeny master_phy = inferPhylogeny( _msa );
if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
.getLength(), n, seed );
final Phylogeny[] eval_phys = new Phylogeny[ n ];
for( int i = 0; i < n; ++i ) {
resampleable_msa.resample( resampled_column_positions[ i ] );
eval_phys[ i ] = inferPhylogeny( resampleable_msa );
}
ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
}
_mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
_mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
JOptionPane.showMessageDialog( _mf,
"NJ successfully completed",
"Inference Completed",
JOptionPane.INFORMATION_MESSAGE );
}
@Override
public void run() {
infer();
}
private Msa runMAFFT( final File input_seqs, final List<String> opts ) throws IOException, InterruptedException {
Msa msa = null;
final MsaInferrer mafft = Mafft.createInstance( "/usr/local/bin/mafft" );
try {
msa = mafft.infer( input_seqs, opts );
}
catch ( final IOException e ) {
System.out.println( mafft.getErrorDescription() );
}
return msa;
}
private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) {
if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
try {
final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options
.getIntermediateFilesBase()
+ MSA_FILE_SUFFIX ) );
_msa.write( msa_writer );
msa_writer.close();
final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options
.getIntermediateFilesBase()
+ PWD_FILE_SUFFIX ) );
m.write( pwd_writer );
pwd_writer.close();
}
catch ( final Exception e ) {
System.out.println( "Error: " + e.getMessage() );
}
}
}
}