package es.uvigo.darwin.prottest.util.fileio; /* * NexusImporter.java * * (c) 2002-2005 JEBL development team * * This package may be distributed under the * Lesser Gnu Public Licence (LGPL) */ import es.uvigo.darwin.prottest.util.attributable.Attributable; import es.uvigo.darwin.prottest.taxa.Taxon; import es.uvigo.darwin.prottest.tree.TreeUtils; import es.uvigo.darwin.prottest.util.exception.ImportException; import pal.tree.SimpleTree; import pal.tree.Tree; import java.awt.*; import java.io.EOFException; import java.io.IOException; import java.io.Reader; import java.io.StringWriter; import java.io.Writer; import java.util.*; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import pal.misc.Identifier; import pal.tree.Node; import pal.tree.NodeFactory; /** * Class for importing NEXUS file format * * @version $Id: NexusImporter.java 723 2007-06-11 05:40:44Z matt_kearse $ * * @author Andrew Rambaut * @author Alexei Drummond */ public class NexusImporter { public enum NexusBlock { UNKNOWN, TAXA, CHARACTERS, DATA, UNALIGNED, DISTANCES, TREES } private boolean compactTrees = false; private Writer commentWriter; private String nexusId; public String getNexusId() { return nexusId; } // NEXUS specific ImportException classes public static class MissingBlockException extends ImportException { public MissingBlockException() { super(); } public MissingBlockException(String message) { super(message); } } /** * Constructor */ public NexusImporter(Reader reader, long expectedLength) { helper = new ImportHelper(reader); helper.setExpectedInputLength(expectedLength); initHelper(); } /** * Constructor */ public NexusImporter(Reader reader) { this(reader, 0); } public NexusImporter(Reader reader, boolean compactTrees, long expectedInputLength) { this(reader, expectedInputLength); this.compactTrees = compactTrees; } private void initHelper() { // ! defines a comment to be written out to a log file // & defines a meta comment helper.setCommentDelimiters('[', ']', '\0', '!', '&'); commentWriter = new StringWriter(); helper.setCommentWriter(commentWriter); } // public long findId() { // // } /** * This function returns an integer to specify what the * next block in the file is. The internal variable nextBlock is also set to this * value. This should be overridden to provide support for other blocks. Once * the block is read in, nextBlock is automatically set to UNKNOWN_BLOCK by * findEndBlock. */ public NexusBlock findNextBlock() throws IOException { findToken("BEGIN", true); nextBlockName = helper.readToken(";").toUpperCase(); return findBlockName(nextBlockName); } /** * This function returns an enum class to specify what the * block given by blockName is. */ private NexusBlock findBlockName(String blockName) { try { nextBlock = NexusBlock.valueOf(blockName); } catch (IllegalArgumentException e) { // handle unknown blocks. java 1.5 throws an exception in valueOf nextBlock = null; } if (nextBlock == null) { nextBlock = NexusBlock.UNKNOWN; } return nextBlock; } public String getNextBlockName() { return nextBlockName; } /** * Returns an iterator over a set of elements of type T. * * @return an Iterator. */ public Iterator<Tree> iterator() { return new Iterator<Tree>() { public boolean hasNext() { boolean hasNext = false; try { hasNext = hasTree(); } catch (IOException e) { // deal with errors by stopping the iteration } catch (ImportException e) { // deal with errors by stopping the iteration } return hasNext; } public Tree next() { Tree tree = null; try { tree = importNextTree(); } catch (IOException e) { // deal with errors by stopping the iteration } catch (ImportException e) { // deal with errors by stopping the iteration } if (tree == null) { throw new NoSuchElementException("No more trees in this file"); } return tree; } public void remove() { throw new UnsupportedOperationException("operation is not supported by this Iterator"); } }; } /** * Parses a 'TREES' block. */ public List<Tree> parseTreesBlock(List<Taxon> taxonList) throws ImportException, IOException { return readTreesBlock(taxonList); } // ************************************************************** // TreeImporter IMPLEMENTATION // ************************************************************** private boolean isReadingTreesBlock = false; private List<Taxon> treeTaxonList = null; private Map<String, Taxon> translationList = Collections.emptyMap(); private Tree nextTree = null; private String[] lastToken = new String[1]; /** * return whether another tree is available. */ public boolean hasTree() throws IOException, ImportException { if (!isReadingTreesBlock) { isReadingTreesBlock = startReadingTrees(); translationList = readTranslationList(treeTaxonList, lastToken); } if (!isReadingTreesBlock) { return false; } if (nextTree == null) { nextTree = readNextTree(lastToken); } return (nextTree != null); } /** * import the next tree. * return the tree or null if no more trees are available */ public Tree importNextTree() throws IOException, ImportException { // call hasTree to do the hard work... if (!hasTree()) { isReadingTreesBlock = false; return null; } Tree tree = nextTree; nextTree = null; return tree; } public List<Tree> importTrees() throws IOException, ImportException { isReadingTreesBlock = false; if (!startReadingTrees()) { throw new MissingBlockException("TREES block is missing"); } List<Tree> treesBlock = readTreesBlock(treeTaxonList); helper.closeReader(); nexusId = commentWriter.toString(); commentWriter.close(); return treesBlock; } public boolean startReadingTrees() throws IOException, ImportException { treeTaxonList = null; while (true) { try { NexusBlock block = findNextBlock(); switch (block) { // case TAXA: treeTaxonList = readTaxaBlock(); break; case TREES: return true; // Ignore the block.. default: break; } } catch (EOFException ex) { break; } } return false; } // ************************************************************** // DistanceMatrixImporter IMPLEMENTATION // ************************************************************** // ************************************************************** // PRIVATE Methods // ************************************************************** /** * Finds the end of the current block. */ private void findToken(String query, boolean ignoreCase) throws IOException { String token; boolean found = false; do { token = helper.readToken(); if ((ignoreCase && token.equalsIgnoreCase(query)) || token.equals(query)) { found = true; } } while (!found); } /** * Finds the end of the current block. */ public void findEndBlock() throws IOException { try { String token; do { token = helper.readToken(";"); } while (!token.equalsIgnoreCase("END") && !token.equalsIgnoreCase("ENDBLOCK")); } catch (EOFException e) { // Doesn't matter if the End is missing } nextBlock = NexusBlock.UNKNOWN; } /** * Reads a 'TREES' block. */ private List<Tree> readTreesBlock(List<Taxon> taxonList) throws ImportException, IOException { List<Tree> trees = new ArrayList<Tree>(); double cumWeight = 0.0; String[] localLastToken = new String[1]; translationList = readTranslationList(taxonList, localLastToken); while (true) { SimpleTree tree = readNextTree(localLastToken); if (tree == null) { break; } cumWeight += (Double) tree.getAttribute(tree.getRoot(), TreeUtils.TREE_WEIGHT_ATTRIBUTE); trees.add(tree); } if (trees.size() == 0) { throw new ImportException.BadFormatException("No trees defined in TREES block"); } if (cumWeight > 1.0) { // normalization is required for (Tree tree : trees) { double treeWeight = (Double) tree.getAttribute(tree.getRoot(), TreeUtils.TREE_WEIGHT_ATTRIBUTE); treeWeight /= cumWeight; tree.setAttribute(tree.getRoot(), TreeUtils.TREE_WEIGHT_ATTRIBUTE, treeWeight); } } nextBlock = NexusBlock.UNKNOWN; return trees; } private Map<String, Taxon> readTranslationList(List<Taxon> taxonList, String[] lastToken) throws ImportException, IOException { Map<String, Taxon> localTranslationList = new HashMap<String, Taxon>(); String token = helper.readToken(";"); if (token.equalsIgnoreCase("TRANSLATE")) { do { String token2 = helper.readToken(",;"); if (helper.getLastDelimiter() == ',' || helper.getLastDelimiter() == ';') { if (token2.length() == 0 && (char) helper.getLastDelimiter() == ';') { //assume an extra comma at end of list break; } throw new ImportException.BadFormatException("Missing taxon label in TRANSLATE command of TREES block"); } String token3 = helper.readToken(",;"); if (helper.getLastDelimiter() != ',' && helper.getLastDelimiter() != ';') { throw new ImportException.BadFormatException("Expecting ',' or ';' after taxon label in TRANSLATE command of TREES block"); } Taxon taxon = Taxon.getTaxon(token3); if (taxonList != null && !taxonList.contains(taxon)) { // taxon not found in taxon list... // ...perhaps it is a numerical taxon reference? throw new ImportException.UnknownTaxonException(token3); } localTranslationList.put(token2, taxon); } while (helper.getLastDelimiter() != ';'); token = helper.readToken(";"); } else if (taxonList != null) { for (Taxon taxon : taxonList) { localTranslationList.put(taxon.getName(), taxon); } } lastToken[0] = token; return localTranslationList; } private SimpleTree readNextTree(String[] lastToken) throws ImportException, IOException { try { SimpleTree tree = null; String token = lastToken[0]; double weight; String treeName; boolean isUnrooted = token.equalsIgnoreCase("UTREE"); if (isUnrooted || token.equalsIgnoreCase("TREE")) { if (helper.nextCharacter() == '*') { // Star is used to specify a default tree - ignore it helper.readCharacter(); } final String meta = helper.getLastMetaComment(); if (meta != null) { // Look for the unrooted meta comment [&U] if (meta.equalsIgnoreCase("U")) { isUnrooted = true; } helper.clearLastMetaComment(); } treeName = helper.readToken("=;"); if (helper.getLastDelimiter() != '=') { throw new ImportException.BadFormatException("Missing label for tree'" + treeName + "' or missing '=' in TREE command of TREES block"); } try { if (helper.nextCharacter() != '(') { throw new ImportException.BadFormatException("Missing tree definition in TREE command of TREES block"); } // Save tree comment and attach it later final String comment = helper.getLastMetaComment(); helper.clearLastMetaComment(); Node internalNode = readInternalNode(); tree = new SimpleTree(internalNode); int last = helper.getLastDelimiter(); if (last == ':') { // root length - discard for now /*double rootLength = */ helper.readDouble(";"); last = helper.getLastDelimiter(); } if (last != ';') { throw new ImportException.BadFormatException("Expecting ';' after tree, '" + treeName + "', TREE command of TREES block"); } weight = 1.0; if (comment != null) { // if '[W number]' (MrBayes), set weight attribute // ignore any other comment if (comment.matches("^W\\s+[\\+\\-]?[\\d\\.]+")) { weight = new Double(comment.substring(2)); } } tree.setAttribute(internalNode, TreeUtils.TREE_WEIGHT_ATTRIBUTE, weight); tree.setAttribute(internalNode, TreeUtils.TREE_NAME_ATTRIBUTE, treeName); } catch (EOFException e) { // If we reach EOF we may as well return what we have? return tree; } token = helper.readToken(";"); } else if (token.equalsIgnoreCase("ENDBLOCK") || token.equalsIgnoreCase("END")) { return null; } else { throw new ImportException.BadFormatException("Unknown command '" + token + "' in TREES block"); } //added this to escape readNextTree loop correctly -- AJD lastToken[0] = token; return tree; } catch (EOFException e) { return null; } } /** * Reads a branch in. This could be a node or a tip (calls readNode or readTip * accordingly). It then reads the branch length and SimpleNode that will * point at the new node or tip. */ private Node readBranch() throws IOException, ImportException { Node branch; helper.clearLastMetaComment(); if (helper.nextCharacter() == '(') { // is an internal node branch = readInternalNode(); } else { // is an external node branch = readExternalNode(); } if (helper.getLastDelimiter() == ':') { final double length = helper.readDouble(",():;"); branch.setBranchLength(length); } return branch; } /** * Reads a node in. This could be a polytomy. Calls readBranch on each branch * in the node. * @param tree * @return */ private Node readInternalNode() throws IOException, ImportException { List<Node> children = new ArrayList<Node>(); // read the opening '(' helper.readCharacter(); // read the first child children.add(readBranch()); if (helper.getLastDelimiter() != ',') { //throw new ImportException.BadFormatException("Missing ',' in tree"); } // MK: previously, an internal node must have at least 2 children. // MK: We we now allow trees with a single child so that we can create proper taxonomy // MK: trees with only a single child at a taxonomy level. // read subsequent children while (helper.getLastDelimiter() == ',') { children.add(readBranch()); } // should have had a closing ')' if (helper.getLastDelimiter() != ')') { throw new ImportException.BadFormatException("Missing closing ')' in tree"); } Node node = NodeFactory.createNode(children.toArray(new Node[0])); // find the next delimiter String token = helper.readToken(":(),;").trim(); // if there is a token before the branch length, treat it as a node label // and store it as an attribute of the node... if (token.length() > 0) { node.setIdentifier(new Identifier((String) parseValue(token))); } return node; } /** * Reads an external node in. */ private Node readExternalNode() throws ImportException, IOException { String label = helper.readToken(":(),;"); Taxon taxon; try { taxon = Taxon.getTaxon(label); } catch (IllegalArgumentException e) { throw new ImportException.UnknownTaxonException(e.getMessage()); } if (translationList.size() > 0) { taxon = translationList.get(label); if (taxon == null) { // taxon not found in taxon list... throw new ImportException.UnknownTaxonException("Taxon in tree, '" + label + "' is unknown"); } } try { final Node node = NodeFactory.createNode(new Identifier(taxon.getName())); return node; } catch (IllegalArgumentException e) { throw new ImportException.DuplicateTaxaException(e.getMessage()); } } static void parseMetaCommentPairs(String meta, Attributable item) throws ImportException.BadFormatException { // This regex should match key=value pairs, separated by commas // This can match the following types of meta comment pairs: // value=number, value="string", value={item1, item2, item3} // (label must be quoted if it contains spaces (i.e. "my label"=label) Pattern pattern = Pattern.compile("(\"[^\"]*\"+|[^,=\\s]+)\\s*(=\\s*(\\{[^=}]*\\}|\"[^\"]*\"+|[^,]+))?"); Matcher matcher = pattern.matcher(meta); while (matcher.find()) { String label = matcher.group(1); if (label.charAt(0) == '\"') { label = label.substring(1, label.length() - 1); } if (label == null || label.trim().length() == 0) { throw new ImportException.BadFormatException("Badly formatted attribute: '" + matcher.group() + "'"); } final String value = matcher.group(2); if (value != null && value.trim().length() > 0) { // there is a specified value so try to parse it item.setAttribute(label, parseValue(value.substring(1))); } else { item.setAttribute(label, Boolean.TRUE); } } } /** * This method takes a string and tries to decode it returning the object * that best fits the data. It will recognize command delimited lists enclosed * in {..} and call parseValue() on each element. It will also recognize Boolean, * Integer and Double. If the value starts with a # then it will attempt to decode * the following integer as an RGB colour - see Color.decode(). If nothing else fits * then the value will be returned as a string but trimmed of leading and trailing * white space. * @param value the string * @return the object */ static Object parseValue(String value) { value = value.trim(); if (value.startsWith("{")) { // the value is a list so recursively parse the elements // and return an array String[] elements = value.substring(1, value.length() - 1).split(","); Object[] values = new Object[elements.length]; for (int i = 0; i < elements.length; i++) { values[i] = parseValue(elements[i]); } return values; } if (value.startsWith("#")) { // I am not sure whether this is a good idea but // I am going to assume that a # denotes an RGB colour try { return Color.decode(value.substring(1)); } catch (NumberFormatException nfe1) { // not a colour } } // A string qouted by the nexus exporter and such if (value.startsWith("\"") && value.endsWith("\"")) { return value.subSequence(1, value.length() - 1); } if (value.equalsIgnoreCase("TRUE") || value.equalsIgnoreCase("FALSE")) { return Boolean.valueOf(value); } // Attempt to format the value as an integer try { return Integer.parseInt(value); } catch (NumberFormatException nfe1) { // not an integer } // Attempt to format the value as a double try { return Double.parseDouble(value); } catch (NumberFormatException nfe2) { // not a double } // return the trimmed string return value; } // private stuff private NexusBlock nextBlock = null; private String nextBlockName = null; protected final ImportHelper helper; }