/* * BEAUTiImporter.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.app.beauti.util; import dr.app.beauti.BeautiFrame; import dr.app.beauti.mcmcpanel.MCMCPanel; import dr.app.beauti.options.*; import dr.app.util.Utils; import dr.evolution.alignment.Alignment; import dr.evolution.alignment.Patterns; import dr.evolution.alignment.SimpleAlignment; import dr.evolution.datatype.DataType; import dr.evolution.datatype.Microsatellite; import dr.evolution.datatype.Nucleotides; import dr.evolution.io.FastaImporter; import dr.evolution.io.Importer.ImportException; import dr.evolution.io.MicroSatImporter; import dr.evolution.io.NexusImporter; import dr.evolution.io.NexusImporter.MissingBlockException; import dr.evolution.io.NexusImporter.NexusBlock; import dr.evolution.tree.Tree; import dr.evolution.util.Taxa; import dr.evolution.util.Taxon; import dr.evolution.util.TaxonList; import dr.evolution.util.Units; import dr.util.DataTable; import org.jdom.JDOMException; import javax.swing.*; import java.awt.*; import java.io.*; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.List; /** * @author Andrew Rambaut * @author Walter Xie * @version $Id$ */ public class BEAUTiImporter { private final BeautiOptions options; private final BeautiFrame frame; private PartitionNameDialog partitionNameDialog = null; public BEAUTiImporter(BeautiFrame frame, BeautiOptions options) { this.frame = frame; this.options = options; } public void importFromFile(File file) throws IOException, ImportException, JDOMException { try { Reader reader = new FileReader(file); BufferedReader bufferedReader = new BufferedReader(reader); String line = bufferedReader.readLine(); while (line != null && line.length() == 0) { line = bufferedReader.readLine(); } if ((line != null && line.toUpperCase().contains("#NEXUS"))) { // is a NEXUS file importNexusFile(file); } else if ((line != null && line.trim().startsWith("" + FastaImporter.FASTA_FIRST_CHAR))) { // is a FASTA file importFastaFile(file); } else if ((line != null && (line.toUpperCase().contains("<?XML") || line.toUpperCase().contains("<BEAST")))) { // assume it is a BEAST XML file and see if that works... importBEASTFile(file); // } else { // // assume it is a tab-delimited traits file and see if that works... // importTraits(file); } else if ((line != null && line.toUpperCase().contains("#MICROSAT"))) { // MicroSatellite importMicroSatFile(file); } else { throw new ImportException("Unrecognized format for imported file."); } bufferedReader.close(); } catch (IOException e) { throw new IOException(e.getMessage()); } } // micro-sat private void importMicroSatFile(File file) throws IOException, ImportException { try { Reader reader = new FileReader(file); BufferedReader bufferedReader = new BufferedReader(reader); MicroSatImporter importer = new MicroSatImporter(bufferedReader); List<Patterns> microsatPatList = importer.importPatterns(); Taxa unionSetTaxonList = importer.getUnionSetTaxonList(); Microsatellite microsatellite = importer.getMicrosatellite(); // options.allowDifferentTaxa = importer.isHasDifferentTaxon(); bufferedReader.close(); PartitionSubstitutionModel substModel = new PartitionSubstitutionModel(options, microsatPatList.get(0).getId()); substModel.setMicrosatellite(microsatellite); for (Patterns patterns : microsatPatList) { setData(file.getName(), unionSetTaxonList, patterns, substModel, null); } // has to call after data is imported options.microsatelliteOptions.initModelParametersAndOpererators(); } catch (ImportException e) { throw new ImportException(e.getMessage()); } catch (IOException e) { throw new IOException(e.getMessage()); } } // xml private void importBEASTFile(File file) throws IOException, ImportException, JDOMException { try { FileReader reader = new FileReader(file); BeastImporter importer = new BeastImporter(reader); List<TaxonList> taxonLists = new ArrayList<TaxonList>(); List<Alignment> alignments = new ArrayList<Alignment>(); importer.importBEAST(taxonLists, alignments); TaxonList taxa = taxonLists.get(0); int count = 1; for (Alignment alignment : alignments) { String name = file.getName(); if (alignment.getId() != null && alignment.getId().length() > 0) { name = alignment.getId(); } else { if (alignments.size() > 1) { name += count; } } setData(name, taxa, alignment, null, null, null, null); count++; } // assume that any additional taxon lists are taxon sets... for (int i = 1; i < taxonLists.size(); i++) { Taxa taxonSet = (Taxa) taxonLists.get(i); options.taxonSets.add(taxonSet); options.taxonSetsMono.put(taxonSet, false); options.taxonSetsIncludeStem.put(taxonSet, false); options.taxonSetsTreeModel.put(taxonSet, options.getPartitionTreeModels().get(0)); } reader.close(); } catch (JDOMException e) { throw new JDOMException(e.getMessage()); } catch (ImportException e) { throw new ImportException(e.getMessage()); } catch (IOException e) { throw new IOException(e.getMessage()); } } // nexus private void importNexusFile(File file) throws IOException, ImportException { TaxonList taxa = null; SimpleAlignment alignment = null; List<Tree> trees = new ArrayList<Tree>(); PartitionSubstitutionModel model = null; List<NexusApplicationImporter.CharSet> charSets = new ArrayList<NexusApplicationImporter.CharSet>(); try { FileReader reader = new FileReader(file); NexusApplicationImporter importer = new NexusApplicationImporter(reader); boolean done = false; while (!done) { try { NexusBlock block = importer.findNextBlock(); if (block == NexusImporter.TAXA_BLOCK) { if (taxa != null) { throw new MissingBlockException("TAXA block already defined"); } taxa = importer.parseTaxaBlock(); } else if (block == NexusImporter.CALIBRATION_BLOCK) { if (taxa == null) { throw new MissingBlockException("TAXA or DATA block must be defined before a CALIBRATION block"); } importer.parseCalibrationBlock(taxa); } else if (block == NexusImporter.CHARACTERS_BLOCK) { if (taxa == null) { throw new MissingBlockException("TAXA block must be defined before a CHARACTERS block"); } if (alignment != null) { throw new MissingBlockException("CHARACTERS or DATA block already defined"); } alignment = (SimpleAlignment) importer.parseCharactersBlock(taxa); } else if (block == NexusImporter.DATA_BLOCK) { if (alignment != null) { throw new MissingBlockException("CHARACTERS or DATA block already defined"); } // A data block doesn't need a taxon block before it // but if one exists then it will use it. alignment = (SimpleAlignment) importer.parseDataBlock(taxa); if (taxa == null) { taxa = alignment; } } else if (block == NexusImporter.TREES_BLOCK) { // I guess there is no reason not to allow multiple trees blocks // if (trees.size() > 0) { // throw new MissingBlockException("TREES block already defined"); // } Tree[] treeArray = importer.parseTreesBlock(taxa); trees.addAll(Arrays.asList(treeArray)); if (taxa == null && trees.size() > 0) { taxa = trees.get(0); } } else if (block == NexusApplicationImporter.PAUP_BLOCK) { model = importer.parsePAUPBlock(options, charSets); } else if (block == NexusApplicationImporter.MRBAYES_BLOCK) { model = importer.parseMrBayesBlock(options, charSets); } else if (block == NexusApplicationImporter.ASSUMPTIONS_BLOCK || block == NexusApplicationImporter.SETS_BLOCK) { importer.parseAssumptionsBlock(charSets); } else { // Ignore the block.. } } catch (EOFException ex) { done = true; } } reader.close(); // Allow the user to load taxa only (perhaps from a tree file) so that they can sample from a prior... if (alignment == null && taxa == null) { throw new MissingBlockException("TAXON, DATA or CHARACTERS block is missing"); } } catch (IOException e) { throw new IOException(e.getMessage()); } catch (ImportException e) { throw new ImportException(e.getMessage()); // } catch (Exception e) { // throw new Exception(e.getMessage()); } setData(file.getName(), taxa, alignment, charSets, model, null, trees); } // FASTA private void importFastaFile(File file) throws IOException, ImportException { try { FileReader reader = new FileReader(file); FastaImporter importer = new FastaImporter(reader, Nucleotides.INSTANCE); Alignment alignment = importer.importAlignment(); reader.close(); setData(file.getName(), alignment, alignment, null, null, null, null); } catch (ImportException e) { throw new ImportException(e.getMessage()); } catch (IOException e) { throw new IOException(e.getMessage()); } } private boolean isMissingValue(String value) { return (value.equals("?") || value.equals("NA") || value.length() == 0); } public void importTraits(final File file) throws Exception { List<TraitData> importedTraits = new ArrayList<TraitData>(); Taxa taxa = options.taxonList; DataTable<String[]> dataTable = DataTable.Text.parse(new FileReader(file)); String[] traitNames = dataTable.getColumnLabels(); String[] taxonNames = dataTable.getRowLabels(); for (int i = 0; i < dataTable.getColumnCount(); i++) { boolean warningGiven = false; String traitName = traitNames[i]; String[] values = dataTable.getColumn(i); Class c = null; if (!isMissingValue(values[0])) { c = Utils.detectType(values[0]); } for (int j = 1; j < values.length; j++) { if (!isMissingValue(values[j])) { if (c == null) { c = Utils.detectType(values[j]); } else { Class c1 = Utils.detectType(values[j]); if (c == Integer.class && c1 == Double.class) { // change the type to double c = Double.class; } if (c1 != c && !(c == Double.class && c1 == Integer.class) && !warningGiven ) { JOptionPane.showMessageDialog(frame, "Not all values of same type for trait" + traitName, "Incompatible values", JOptionPane.WARNING_MESSAGE); warningGiven = true; } } } } TraitData.TraitType t = (c == Boolean.class || c == String.class || c == null) ? TraitData.TraitType.DISCRETE : (c == Integer.class) ? TraitData.TraitType.INTEGER : TraitData.TraitType.CONTINUOUS; TraitData newTrait = new TraitData(options, traitName, file.getName(), t); if (validateTraitName(traitName)) { importedTraits.add(newTrait); } int j = 0; for (final String taxonName : taxonNames) { final int index = taxa.getTaxonIndex(taxonName); Taxon taxon; if (index >= 0) { taxon = taxa.getTaxon(index); } else { taxon = new Taxon(taxonName); taxa.addTaxon(taxon); } if (!isMissingValue(values[j])) { taxon.setAttribute(traitName, Utils.constructFromString(c, values[j])); } else { // AR - merge rather than replace existing trait values if (taxon.getAttribute(traitName) == null) { taxon.setAttribute(traitName, "?"); } } j++; } } setData(file.getName(), taxa, null, null, null, importedTraits, null); } public boolean validateTraitName(String traitName) { // check that the name is valid if (traitName.trim().length() == 0) { Toolkit.getDefaultToolkit().beep(); return false; } // disallow a trait called 'date' if (traitName.equalsIgnoreCase("date")) { JOptionPane.showMessageDialog(frame, "This trait name has a special meaning. Use the 'Tip Date' panel\n" + " to set dates for taxa.", "Reserved trait name", JOptionPane.WARNING_MESSAGE); return false; } // check that the trait name doesn't exist if (options.traitExists(traitName)) { int option = JOptionPane.showConfirmDialog(frame, "A trait of this name already exists. Do you wish to replace\n" + "it with this new trait? This may result in the loss or change\n" + "in trait values for the taxa.", "Overwrite trait?", JOptionPane.YES_NO_OPTION, JOptionPane.WARNING_MESSAGE); if (option == JOptionPane.NO_OPTION) { return false; } } return true; } // for Alignment private void setData(String fileName, TaxonList taxonList, Alignment alignment, List<NexusApplicationImporter.CharSet> charSets, PartitionSubstitutionModel model, List<TraitData> traits, List<Tree> trees) throws ImportException, IllegalArgumentException { String fileNameStem = Utils.trimExtensions(fileName, new String[]{"NEX", "NEXUS", "FA", "FAS", "FASTA", "TRE", "TREE", "XML", "TXT"}); if (options.fileNameStem == null || options.fileNameStem.equals(MCMCPanel.DEFAULT_FILE_NAME_STEM)) { options.fileNameStem = fileNameStem; } addTaxonList(taxonList); addAlignment(alignment, charSets, model, fileName, fileNameStem); addTraits(traits); addTrees(trees); } // for Patterns private void setData(String fileName, Taxa taxonList, Patterns patterns, PartitionSubstitutionModel model, List<TraitData> traits ) throws ImportException, IllegalArgumentException { String fileNameStem = Utils.trimExtensions(fileName, new String[]{"NEX", "NEXUS", "FA", "FAS", "FASTA", "TRE", "TREE", "XML", "TXT"}); if (options.fileNameStem == null || options.fileNameStem.equals(MCMCPanel.DEFAULT_FILE_NAME_STEM)) { options.fileNameStem = fileNameStem; } addTaxonList(taxonList); addPatterns(patterns, model, fileName); addTraits(traits); } private void addTaxonList(TaxonList taxonList) throws ImportException { checkTaxonList(taxonList); if (options.taxonList == null) { // This is the first partition to be loaded... options.taxonList = new Taxa(taxonList); } else { // otherwise just add the new ones... for (Taxon taxon : taxonList) { if (!options.taxonList.contains(taxon)) { options.taxonList.addTaxon(taxon); } } } } private void checkTaxonList(TaxonList taxonList) throws ImportException { // check the taxon names for invalid characters boolean foundAmp = false; for (Taxon taxon : taxonList) { String name = taxon.getId(); if (name.indexOf('&') >= 0) { foundAmp = true; } } if (foundAmp) { throw new ImportException("One or more taxon names include an illegal character ('&').\n" + "These characters will prevent BEAST from reading the resulting XML file.\n\n" + "Please edit the taxon name(s) before reloading the data file."); } // make sure they all have dates... for (int i = 0; i < taxonList.getTaxonCount(); i++) { if (taxonList.getTaxonAttribute(i, "date") == null) { Date origin = new Date(0); dr.evolution.util.Date date = dr.evolution.util.Date.createTimeSinceOrigin(0.0, Units.Type.YEARS, origin); taxonList.getTaxon(i).setAttribute("date", date); } } } private void addAlignment(Alignment alignment, List<NexusApplicationImporter.CharSet> charSets, PartitionSubstitutionModel model, String fileName, String fileNameStem) { if (alignment != null) { List<AbstractPartitionData> partitions = new ArrayList<AbstractPartitionData>(); if (charSets != null && charSets.size() > 0) { for (NexusApplicationImporter.CharSet charSet : charSets) { partitions.add(new PartitionData(options, charSet.name, fileName, charSet.constructCharSetAlignment(alignment))); } } else { partitions.add(new PartitionData(options, fileNameStem, fileName, alignment)); } createPartitionFramework(model, partitions); } } private void addPatterns(Patterns patterns, PartitionSubstitutionModel model, String fileName) { if (patterns != null) { List<AbstractPartitionData> partitions = new ArrayList<AbstractPartitionData>(); partitions.add(new PartitionPattern(options, patterns.getId(), fileName, patterns)); createPartitionFramework(model, partitions); } } private void createPartitionFramework(PartitionSubstitutionModel model, List<AbstractPartitionData> partitions) { for (AbstractPartitionData partition : partitions) { String name = partition.getName(); while (name.length() == 0 || options.hasPartitionData(name)) { String text; if (options.hasPartitionData(name)) { text = "<html>" + "A partition named, " + name + ", already exists.<br>" + "Please provide a unique name for this partition." + "</html>"; } else { text = "<html>" + "Invalid partition name. Please provide a unique<br>" + "name for this partition." + "</html>"; } if (partitionNameDialog == null) { partitionNameDialog = new PartitionNameDialog(frame); } partitionNameDialog.setDescription(text); int result = partitionNameDialog.showDialog(); if (result == -1 || result == JOptionPane.CANCEL_OPTION) { return; } name = partitionNameDialog.getName(); } partition.setName(name); options.dataPartitions.add(partition); if (model != null) { setSubstModel(partition, model); setClockAndTree(partition);//TODO Cannot load Clock Model and Tree Model from BEAST file yet } else {// only this works if (options.getPartitionSubstitutionModels(partition.getDataType()).size() < 1) {// use same substitution model in beginning // PartitionSubstitutionModel based on PartitionData PartitionSubstitutionModel psm = new PartitionSubstitutionModel(options, partition); partition.setPartitionSubstitutionModel(psm); } else { //if (options.getPartitionSubstitutionModels() != null) { // && options.getPartitionSubstitutionModels().size() == 1) { PartitionSubstitutionModel psm = options.getPartitionSubstitutionModels(partition.getDataType()).get(0); setSubstModel(partition, psm); } setClockAndTree(partition); } } options.updatePartitionAllLinks(); //options.clockModelOptions.initClockModelGroup(); } private void setClockAndTree(AbstractPartitionData partition) { // use same clock model in beginning, have to create after partition.setPartitionTreeModel(ptm); if (options.getPartitionClockModels(partition.getDataType()).size() < 1) { // PartitionClockModel based on PartitionData PartitionClockModel pcm = new PartitionClockModel(options, partition); partition.setPartitionClockModel(pcm); } else { //if (options.getPartitionClockModels() != null) { // && options.getPartitionClockModels().size() == 1) { PartitionClockModel pcm; if (partition.getDataType().getType() == DataType.MICRO_SAT) { pcm = new PartitionClockModel(options, partition); } else { // make sure in the same data type pcm = options.getPartitionClockModels(partition.getDataType()).get(0); } partition.setPartitionClockModel(pcm); } // use same tree model and same tree prior in beginning if (options.getPartitionTreeModels().size() < 1) { // PartitionTreeModel based on PartitionData PartitionTreeModel ptm = new PartitionTreeModel(options, partition); partition.setPartitionTreeModel(ptm); // PartitionTreePrior always based on PartitionTreeModel PartitionTreePrior ptp = new PartitionTreePrior(options, ptm); ptm.setPartitionTreePrior(ptp); } else { //if (options.getPartitionTreeModels() != null) { // && options.getPartitionTreeModels().size() == 1) { PartitionTreeModel ptm; if (partition.getDataType().getType() == DataType.MICRO_SAT) { ptm = new PartitionTreeModel(options, partition); // different tree model, PartitionTreePrior ptp = options.getPartitionTreePriors().get(0); // but same tree prior ptm.setPartitionTreePrior(ptp); } else { ptm = options.getPartitionTreeModels().get(0); // same tree model, } partition.setPartitionTreeModel(ptm); // if same tree model, therefore same prior } } private void setSubstModel(AbstractPartitionData partition, PartitionSubstitutionModel psm) { partition.setPartitionSubstitutionModel(psm); if (psm.getDataType().getType() != partition.getDataType().getType()) throw new IllegalArgumentException("Partition " + partition.getName() + "\ncannot assign to Substitution Model\n" + psm.getName() + "\nwith different data type."); } private void addTraits(List<TraitData> traits) { if (traits != null) { for (TraitData trait : traits) { options.addTrait(trait); } options.updatePartitionAllLinks(); } } private void addTrees(List<Tree> trees) { if (trees != null && trees.size() > 0) { for (Tree tree : trees) { String id = tree.getId(); if (id == null || id.trim().length() == 0) { tree.setId("tree_" + (options.userTrees.size() + 1)); } else { String newId = id; int count = 1; for (Tree tree1 : options.userTrees) { if (tree1.getId().equals(newId)) { newId = id + "_" + count; count++; } } tree.setId(newId); } options.userTrees.add(tree); } } } }