package org.phylowidget.tree; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.phylowidget.PhyloTree; import org.phylowidget.tree.TreeIO.TreeOutputConfig; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; public class NexmlIO { static void addOtuToMap(HashMap<String, String> map, Element otu) { map.put(otu.getAttribute("id"), otu.getAttribute("label")); } static HashMap<String, String> createOtusMap(Element otus) { HashMap<String, String> otuMap = new HashMap<String, String>(); NodeList otuEls = otus.getElementsByTagName("otu"); for (int i = 0; i < otuEls.getLength(); i++) { Element el = (Element) otuEls.item(i); addOtuToMap(otuMap, el); } return otuMap; } public static void main(String[] args) { try { URL url = new URL("http://www.nexml.org/nexml/examples/tolweb.xml"); InputStream in = url.openStream(); NexmlIO io = new NexmlIO(PhyloTree.class); RootedTree tree = io.parseStream(in); System.out.println(tree.getNewick()); } catch (Exception e) { e.printStackTrace(); } } HashMap<String, Class> classFactory; public NexmlIO(Class treeClass) { classFactory = new HashMap<String, Class>(); classFactory.put("tree", treeClass); } private void addOtusToMap(HashMap<String, HashMap<String, String>> map, Element otus) { HashMap<String, String> otuMap = createOtusMap(otus); map.put(otus.getAttribute("id"), otuMap); } private void createEdge(Element el, HashMap<String, Object> nodeMap, RootedTree tree) { Object source = nodeMap.get(el.getAttribute("source")); Object target = nodeMap.get(el.getAttribute("target")); if (source == null || target == null) { System.err.println("Source or target is null! " + el.toString()); System.exit(0); } Object edge = tree.addEdge(source, target); String length = el.getAttribute("length"); if (length.length() > 0) { Double dblLength = Double.parseDouble(length); tree.setEdgeWeight(edge, dblLength); } } private Object createNode(Element el, RootedTree tree, HashMap<String, String> otusMap) { // Object newNode = objFromElement(el); Object newNode = tree.createVertex(); DefaultVertex node = (DefaultVertex) newNode; String nodeLabel = el.getAttribute("label"); if (nodeLabel.length() > 0) { nodeLabel = nodeLabel.replaceAll("&","&"); node.setLabel(nodeLabel); } String label = otusMap.get(el.getAttribute("otu")); if (label != null && label.length() > 0) { label = label.replaceAll("&","&"); node.setLabel(label); } // Load annotations from any contained dict elements. List<Element> dicts = getSubElementsByName(el,"dict"); for (Element dict : dicts) { List<Element> keyvals = getSubElementsByName(dict,"*"); for (Element keyval : keyvals) { if (node instanceof PhyloNode) { PhyloNode pn = (PhyloNode) node; pn.setAnnotation(keyval.getNodeName(), keyval.getTextContent()); } } } return node; } private List<Element> getSubElementsByName(Element el,String name) { NodeList subNodes = el.getElementsByTagName(name); ArrayList<Element> els = new ArrayList<Element>(subNodes.getLength()); for (int i=0; i < subNodes.getLength(); i++) { els.add((Element) subNodes.item(i)); } return els; } private Object objFromElement(Element elt) { String tagName = elt.getTagName(); Object obj = null; try { obj = classFactory.get(tagName).newInstance(); } catch (Exception e) { e.printStackTrace(); } // TODO: Automatically store the labels, IDs, etc. return obj; } public RootedTree parseDocument(Document xmlDoc) { HashMap<String, HashMap<String, String>> otuMaps = processOtus(xmlDoc.getElementsByTagName("otus")); HashMap<String, Object> trees = processTrees(xmlDoc.getElementsByTagName("trees"), otuMaps); // Todo: deal with multiple trees somehow. Iterator<String> it = trees.keySet().iterator(); if (it.hasNext()) { String key = it.next(); return (RootedTree) trees.get(key); } else { return null; } } public RootedTree parseReader(Reader in) throws Exception { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); return parseDocument(db.parse(new InputSource(in))); } public RootedTree parseStream(InputStream in) throws Exception { return parseReader(new InputStreamReader(in)); } public RootedTree parseString(String s) throws Exception { return parseReader(new StringReader(s)); } private HashMap<String, HashMap<String, String>> processOtus(NodeList otus) { HashMap<String, HashMap<String, String>> map = new HashMap<String, HashMap<String, String>>(); for (int i = 0; i < otus.getLength(); i++) { Element el = (Element) otus.item(i); addOtusToMap(map, el); } return map; } private Object processTree(Element treeEl, HashMap<String, String> otuMap) { Object newTree = objFromElement(treeEl); RootedTree tree = (RootedTree) newTree; // Go through the nodes and add. HashMap<String, Object> nodeMap = new HashMap<String, Object>(); // Node ID to node object map. NodeList nl = treeEl.getElementsByTagName("node"); for (int i = 0; i < nl.getLength(); i++) { Element el = (Element) nl.item(i); Object node = createNode(el, tree, otuMap); nodeMap.put(el.getAttribute("id"), node); tree.addVertex(node); if (el.getAttribute("root").length() > 0 && el.getAttribute("root").equals("true")) { tree.setRoot((DefaultVertex) node); } else if (tree.getRoot() == null) tree.setRoot((DefaultVertex)node); } // Go through edges and create. nl = treeEl.getElementsByTagName("edge"); for (int i = 0; i < nl.getLength(); i++) { Element el = (Element) nl.item(i); createEdge(el, nodeMap, tree); } // Fix up the sorting. tree.fixSortingByAnnotation("first"); return tree; } private HashMap<String, Object> processTrees(NodeList treesEls, HashMap<String, HashMap<String, String>> otusMap) { HashMap<String, Object> treesMap = new HashMap<String, Object>(); for (int i = 0; i < treesEls.getLength(); i++) { Element el = (Element) treesEls.item(i); HashMap<String, String> otuMap = otusMap.get(el.getAttribute("otus")); NodeList trees = el.getElementsByTagName("tree"); for (int j = 0; j < trees.getLength(); j++) { Element treeEl = (Element) trees.item(j); treesMap.put(treeEl.getAttribute("id"), processTree(treeEl, otuMap)); } } return treesMap; } public static String createNeXMLString(RootedTree tree) { TreeOutputConfig config = new TreeOutputConfig(); config.outputNHX = true; return createNeXMLString(tree, config); } private static String createNeXMLString(RootedTree tree, TreeOutputConfig config) { int nodeId = 0; int edgeId = 0; int otuId = 0; int otusId = 0; int treeId = 0; int treesId = 0; int dictId = 0; int globalDummyId = 0; StringBuffer sb = new StringBuffer(); String boilerplate = "<nex:nexml version=\"0.8\" " + "xmlns=\"http://www.nexml.org/1.0\" " + "xmlns:nex=\"http://www.nexml.org/1.0\" " + "xmlns:xml=\"http://www.w3.org/XML/1998/namespace\" "+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "+ "xsi:schemaLocation=\"http://www.nexml.org/1.0 http://www.nexml.org/1.0/nexml.xsd\" " + " >"; // addLine(sb, "<?xml version='1.0' encoding='ISO-8859-1'?>", 0); addLine( sb, boilerplate, 0); addLine(sb, "<otus id=\"otus1\" label=\"onlyOtus\">", 1); List leaves = tree.getAllLeaves(); HashMap<Object, String> leafToOtuId = new HashMap<Object, String>(); for (Object leaf : leaves) { int myId = otuId++; String otuIdS = "otu" + myId; String label = tree.getLabel((DefaultVertex) leaf); label = escapeXml(label); addLine(sb, "<otu id=" + qw(otuIdS) + " label=" + qw(label) + "/>", 2); leafToOtuId.put(leaf, otuIdS); } addLine(sb, "</otus>", 1); addLine(sb, "<trees id=\"trees1\" otus=\"otus1\" label=\"onlyTrees\">", 1); addLine(sb, "<tree id=\"tree1\" label=\"onlyTree\" xsi:type=\"nex:FloatTree\" >", 2); List<DefaultVertex> nodes = tree.getAllNodes(); HashMap<Object,String> nodeToId = new HashMap<Object,String>(); for (DefaultVertex o : nodes) { nodeId++; String nodeIdString = "node"+nodeId; String labelAndRoot = ""; if (tree.getLabel(o).length() > 0) labelAndRoot += " label="+qw(tree.getLabel(o))+" "; if (tree.isRoot(o)) labelAndRoot += " root="+qw("true")+" "; if (leafToOtuId.containsKey(o)) { labelAndRoot += " otu="+qw(leafToOtuId.get(o))+" "; } labelAndRoot = escapeXml(labelAndRoot); HashMap<String,String> anns = null; if (o instanceof PhyloNode) { PhyloNode pn = (PhyloNode) o; PhyloNode parent = (PhyloNode) tree.getParentOf(pn); if (parent != null) { if (pn == tree.getFirstChild(parent)) { pn.setAnnotation("first", "y"); } else { pn.clearAnnotation("first"); } } anns = pn.getAnnotations(); if (anns != null && anns.keySet().size() == 0) { anns = null; } } if (anns != null) { addLine(sb, "<node id="+qw(nodeIdString)+" "+labelAndRoot+">",3); dictId++; String dictIdString = "dict"+dictId; addLine(sb,"<dict id="+qw(dictIdString)+">",4); for (String s : anns.keySet()) { globalDummyId++; String dummyString = qw("dummy"+globalDummyId); addLine(sb,"<"+s+" id="+dummyString+">"+escapeXml(anns.get(s))+"</"+s+">",5); } addLine(sb,"</dict>",4); addLine(sb,"</node>",3); } else { addLine(sb, "<node id="+qw(nodeIdString)+" "+labelAndRoot+"/>",3); } nodeToId.put(o, nodeIdString); } // Go through and add edges. for (DefaultVertex o : nodes) { if (tree.getParentOf(o) != null) { edgeId++; String edgeIdString = "edge"+edgeId; String sourceAndTarget = ""; String targetId = nodeToId.get(o); String sourceId = nodeToId.get(tree.getParentOf(o)); sourceAndTarget = " source="+qw(sourceId)+" target="+qw(targetId)+" "; String length = " length="+qw(tree.getBranchLength(o)+"")+" "; addLine(sb, "<edge id="+qw(edgeIdString)+" "+sourceAndTarget+length+"/>",3); } } addLine(sb, "</tree>", 2); addLine(sb, "</trees>", 1); addLine(sb, "</nex:nexml>", 0); return sb.toString(); } private static String escapeXml(String s) { s = s.replaceAll("<", "<"); s = s.replaceAll(">", ">"); s = s.replaceAll("&", "&"); return s; } private static String qw(String s) { return "\"" + s + "\""; } private static void addLine(StringBuffer sb, String line, int depth) { String tab = ""; for (int i = 0; i < depth; i++) { tab += " "; } sb.append(tab + line + "\n"); } }