package aliview.importer; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import jebl.evolution.io.FastaImporter; import jebl.evolution.io.ImportException; import jebl.evolution.io.NexusImporter; import jebl.evolution.sequences.SequenceType; import org.apache.log4j.Logger; import aliview.AliView; import aliview.MemoryUtils; import aliview.sequencelist.FileSequenceAlignmentListModel; import aliview.sequencelist.MemorySequenceAlignmentListModel; import aliview.sequencelist.AlignmentListModel; import aliview.sequences.ConvertedJEBLSequence; import aliview.sequences.Sequence; public class SequencesFactory { private static final String LF = System.getProperty("line.separator"); private static final Logger logger = Logger.getLogger(SequencesFactory.class); //private SequencesArrayList sequences; //private int longestSequenceLength = 0; //private FileFormat fileFormat; public SequencesFactory() { } // TODO move to Sequences public List<Sequence> cloneSequences(List<Sequence> seqs){ ArrayList<Sequence> clone = new ArrayList<Sequence>(); for(Sequence seq: seqs){ clone.add(seq); } return clone; } public List<Sequence> createEmptyMemorySequencesArrayList(){ return new ArrayList<Sequence>(); } // // TODO maybe change the "longestSequenceLength" to something more dynamic.... // maybe create a "sequences" container that also keep track of longest seq // // public AlignmentListModel createSequences(File alignmentFile) throws AlignmentImportException{ // Check if file is to large - then create OnFile sequences instead of InMemory String importErrorMessage = ""; AlignmentListModel model = null; // check if file size is to big for memory sequences boolean memorySequences = true; if(alignmentFile != null){ if(alignmentFile.exists()){ double fileSize = alignmentFile.length(); double maxMem = MemoryUtils.getMaxMem(); logger.info("maxMem" + maxMem); logger.info("fileSize" + fileSize); // memory need to be ca 1.3 * times file size if(maxMem/fileSize < 1.3){ logger.info("maxMem/fileSize=" + maxMem/fileSize); memorySequences = false; } // // // TODO remove - this is pretty much only for testing // if(AliView.isDebugMode() && fileSize > 1000 * 1000){ // memorySequences = false; // } } } logger.info("memorySequences=" + memorySequences); // // In memory sequences // if(memorySequences){ // import sequences into memory logger.info("memorySequence"); // check file-format FileFormat foundFormat = FileFormat.isFileOfAlignmentFormat(alignmentFile); if(foundFormat == FileFormat.FASTA){ try { FastFastaImporter fastaImporter = new FastFastaImporter(new FileReader(alignmentFile)); List<Sequence> sequences = fastaImporter.importSequences(); model = new MemorySequenceAlignmentListModel(); model.setSequences(sequences); model.setFileFormat(FileFormat.FASTA); } catch (FileNotFoundException e) { importErrorMessage += "Tried import as Fasta but: " + e.getMessage() + LF; logger.error(importErrorMessage); logger.error(e); } catch (AlignmentImportException aie){ if(aie.getMessage().contains("Sequence to long for memory")){ memorySequences = false; } } } if(foundFormat == FileFormat.MSF){ try { MSFImporter importer = new MSFImporter(new FileReader(alignmentFile)); List<Sequence> sequences = importer.importSequences(); model = new MemorySequenceAlignmentListModel(); model.setSequences(sequences); model.setFileFormat(FileFormat.MSF); } catch (FileNotFoundException e) { importErrorMessage += "Tried import as MSF but: " + e.getMessage() + LF; logger.error(importErrorMessage); logger.error(e); } } if(foundFormat == FileFormat.CLUSTAL){ try { ClustalImporter importer = new ClustalImporter(new FileReader(alignmentFile), alignmentFile.length()); List<Sequence> sequences = importer.importSequences(); model = new MemorySequenceAlignmentListModel(); model.setSequences(sequences); model.setFileFormat(FileFormat.CLUSTAL); } catch (FileNotFoundException e) { importErrorMessage += "Tried import as CLUSTAL but: " + e.getMessage() + LF; logger.error(importErrorMessage); logger.error(e); } } if(foundFormat == FileFormat.PHYLIP){ try { // First try phylip sequencial long names PhylipImporter phylipImporter = new PhylipImporter(new FileReader(alignmentFile), FileFormat.PHYLIP_RELAXED_PADDED_AKA_LONG_NAME_SEQUENTIAL); // this method will throw error if problem importing as this format and then we can try with other versions of phylip List<Sequence> sequences = phylipImporter.importSequences(); model = new MemorySequenceAlignmentListModel(); model.setSequences(sequences); model.setFileFormat(FileFormat.PHYLIP); } catch (Exception e) { // TODO Auto-generated catch block importErrorMessage += "Tried import as Phylip but: " + e.getMessage() + LF; logger.error(importErrorMessage); logger.error(e); } if(model == null){ try { logger.info("try LONG_NAME_INTERLEAVED"); // Then try phylip sequencial short names PhylipImporter phylipImporter = new PhylipImporter(new FileReader(alignmentFile), FileFormat.PHYLIP_RELAXED_PADDED_INTERLEAVED_AKA_LONG_NAME_INTERLEAVED); // this method will throw error if problem importing as this format and then we can try with other versions of phylip List<Sequence> sequences = phylipImporter.importSequences(); model = new MemorySequenceAlignmentListModel(); model.setSequences(sequences); model.setFileFormat(FileFormat.PHYLIP); } catch (Exception e) { // TODO Auto-generated catch block importErrorMessage += "Tried import as Phylip but: " + e.getMessage() + LF; logger.error(importErrorMessage); logger.error(e); } } if(model == null){ try { logger.info("try short name sequential"); // Then try phylip sequencial short names PhylipImporter phylipImporter = new PhylipImporter(new FileReader(alignmentFile), FileFormat.PHYLIP_STRICT_SEQUENTIAL_AKA_SHORT_NAME_SEQUENTIAL); // this method will throw error if problem importing as this format and then we can try with other versions of phylip List<Sequence> sequences = phylipImporter.importSequences(); model = new MemorySequenceAlignmentListModel(); model.setSequences(sequences); model.setFileFormat(phylipImporter.getFileFormat()); } catch (Exception e) { // TODO Auto-generated catch block importErrorMessage += "Tried import as Phylip but: " + e.getMessage() + LF; logger.error(importErrorMessage); logger.error(e); } } if(model == null){ try { logger.info("try short name interleaved"); // Then try phylip sequencial short names PhylipImporter phylipImporter = new PhylipImporter(new FileReader(alignmentFile), FileFormat.PHYLIP_SHORT_NAME_INTERLEAVED); // this method will throw error if problem importing as this format and then we can try with other versions of phylip List<Sequence> sequences = phylipImporter.importSequences(); model = new MemorySequenceAlignmentListModel(); model.setSequences(sequences); model.setFileFormat(FileFormat.PHYLIP); } catch (Exception e) { // TODO Auto-generated catch block importErrorMessage += "Tried import as Phylip but: " + e.getMessage() + LF; logger.error(importErrorMessage); logger.error(e); } } } if(foundFormat == FileFormat.NEXUS){ long fileSize = alignmentFile.length(); // Import sequences with jebl-library if(fileSize < 200 * 1000 * 1000){ // 200MB try{ NexusImporter importer = new jebl.evolution.io.NexusImporter(new FileReader(alignmentFile)); List<jebl.evolution.sequences.Sequence> jeblSequences = importer.importSequences(); if(jeblSequences != null && jeblSequences.size() > 0){ model = new MemorySequenceAlignmentListModel(); model.setSequences(convertJEBLSequences(jeblSequences)); model.setFileFormat(FileFormat.NEXUS); } }catch (ImportException impExc) { logger.error(impExc); importErrorMessage += "Tried import as Nexus but: " + impExc.userMessage() + LF; }catch (Exception e) { logger.error(e); importErrorMessage += "Tried import as Nexus but: " + e.getMessage() + LF; } } else{ try{ FastNexusImporterSlow importer = new FastNexusImporterSlow(alignmentFile); // this method will throw error if problem importing as this format and then we can try with other versions of phylip List<Sequence> sequences = importer.importSequences(); model = new MemorySequenceAlignmentListModel(); model.setSequences(sequences); model.setFileFormat(FileFormat.NEXUS); } catch (Exception e) { // TODO Auto-generated catch block importErrorMessage += "Tried import as Nexus but: " + e.getMessage() + LF; logger.error(importErrorMessage); logger.error(e); } } } if( memorySequences == true ){ if(foundFormat == null || model == null || model.getSize() == 0){ // still nothing throw new AlignmentImportException("Could not find sequences in file: " + alignmentFile + LF + importErrorMessage); } } } // // FILE SEQUENCES // if(!memorySequences){ FileFormat foundFormat = FileFormat.isFileOfAlignmentFormat(alignmentFile); if(foundFormat == FileFormat.FASTA || foundFormat == FileFormat.PHYLIP || foundFormat == FileFormat.NEXUS || foundFormat == FileFormat.CLUSTAL || foundFormat == FileFormat.MSF){ try{ model = new FileSequenceAlignmentListModel(alignmentFile, foundFormat); logger.info(model.getFileFormat()); } catch (Exception e) { e.printStackTrace(); } } // no supported large file format else{ } } // could still be null return model; } public AlignmentListModel createFastaSequences(StringReader stringReader) throws AlignmentImportException { AlignmentListModel model = new MemorySequenceAlignmentListModel(); try { // First try fast fasta FastFastaImporter fastaImporter = new FastFastaImporter(stringReader); model.setSequences(fastaImporter.importSequences()); model.setFileFormat(FileFormat.FASTA); } catch (Exception e) { logger.error(e); } return model; } private List<Sequence> convertJEBLSequences(List<jebl.evolution.sequences.Sequence> jeblSequences) { ArrayList<Sequence> sequences = new ArrayList<Sequence>(); for(jebl.evolution.sequences.Sequence jeblSequence: jeblSequences){ // Craete sequences by wrapping jebl-sequences in AliView-sequences Sequence seq = new ConvertedJEBLSequence(jeblSequence); sequences.add(seq); } return sequences; } }