package gov.nih.ncgc.bard.pcparser; // $Id: XmlTwig.java 2952 2009-07-19 07:34:05Z nguyenda $ // 06.19.07 import java.io.ByteArrayInputStream; import java.io.InputStream; import java.io.Reader; import java.util.Vector; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; /* * this class is inspired by the Perl::Twig module */ public class XmlTwig { private Document m_doc; public XmlTwig (Reader reader) { this (new InputSource (reader)); } public XmlTwig (InputSource is) { try { DocumentBuilder db = DocumentBuilderFactory .newInstance().newDocumentBuilder(); m_doc = db.parse(is); } catch (Exception ex) { ex.printStackTrace(); throw new IllegalArgumentException ("bogus xml stream", ex); } } public XmlTwig (InputStream is) { try { DocumentBuilder db = DocumentBuilderFactory .newInstance().newDocumentBuilder(); m_doc = db.parse(is); } catch (Exception ex) { ex.printStackTrace(); throw new IllegalArgumentException ("bogus xml stream", ex); } } public XmlTwig (Document doc) { m_doc = doc; } public Document getDocument () { return m_doc; } static protected void walkElement (Element match[], Element node, String names[], int depth) { if (depth < names.length) { /* System.err.println (depth + ": " + node.getTagName() + " " + names[depth]); */ if (names[depth].equals(node.getTagName())) { match[depth] = node; /* for (Element e : match) { if (e != null) { System.err.print("->" + e.getTagName()); } } System.out.println(); */ NodeList childs = node.getChildNodes(); for (int i = 0; i < childs.getLength(); ++i) { Node n = childs.item(i); if (n.getNodeType() == Node.ELEMENT_NODE) { walkElement (match, (Element)n, names, depth + 1); } } } } } static protected void getElementValue (String values[], Element node) { NodeList childs = node.getChildNodes(); for (int i = 0; i < childs.getLength(); ++i) { Node n = childs.item(i); short type = n.getNodeType(); switch (type) { case Node.TEXT_NODE: values[0] = n.getNodeValue(); break; case Node.ELEMENT_NODE: getElementValue (values, (Element)n); break; } } } public static String getElementValue (Element node, String path) { Element n = getElement (node, path); String value = null; if (n != null) { String values[] = new String[1]; getElementValue (values, n); value = values[0]; } return value; } public String getElementValue (String path) { return getElementValue (m_doc.getDocumentElement(), path); } public String getElementAttrValue (String path, String attr) { Element elm = getElement (path); String value = null; if (elm != null) { value = elm.getAttribute(attr); } return value; } public static Element getElement (Element node, String path) { String names[] = path.split("/"); Element match[] = new Element[names.length]; //System.err.println("## node: " + node.getTagName() + "... " + path); walkElement (match, node, names, 0); Element elm = match[match.length-1]; if (elm != null && elm.getTagName().equals(names[names.length-1])) { //System.err.println("## MATCHED!!!"); return elm; } //System.err.println("## NOT MATCHED!!!"); return null; } public Element getElement (String path) { return getElement (m_doc.getDocumentElement(), path); } public boolean hasElement (String path) { return getElement (path) != null; } public boolean hasElement (Element node, String path) { return getElement (node, path) != null; } public static String getElementsAsText (Element root, String tag, String separator) { StringBuffer sb = new StringBuffer (); NodeList nodes = root.getElementsByTagName(tag); String[] value = new String[1]; for (int i = 0; i < nodes.getLength(); ++i) { getElementValue (value, (Element)nodes.item(i)); sb.append(value[0] == null ? "" : value[0]); if (i+1 < nodes.getLength()) { sb.append(separator); } value[0] = null; } return sb.toString(); } public static String[] getElementsAsArray (Element root, String tag) { NodeList nodes = root.getElementsByTagName(tag); Vector<String> values = new Vector<String>(); String[] v = new String[1]; for (int i = 0; i < nodes.getLength(); ++i) { getElementValue (v, (Element)nodes.item(i)); if (v[0] != null) { values.add(v[0]); } } return values.toArray(new String[0]); } public String getElementsAsText (String tag, String separator) { return getElementsAsText (m_doc.getDocumentElement(), tag, separator); } public static void main (String argv[]) throws Exception { String xml1 = "<?xml version=\"1.0\"?>" +"<!DOCTYPE PCT-Data PUBLIC \"-//NCBI//NCBI PCTools/EN\" \"http://pubchem.ncbi.nlm.nih.gov/pug/pug.dtd\">" +"<PCT-Data>" +" <PCT-Data_output>" +" <PCT-OutputData>" +" <PCT-OutputData_status>" +" <PCT-Status-Message>" +" <PCT-Status-Message_status>" +" <PCT-Status value=\"success\"/>" +" </PCT-Status-Message_status>" +" </PCT-Status-Message>" +" </PCT-OutputData_status>" +" <PCT-OutputData_output>" +" <PCT-OutputData_output_download-url>" +" <PCT-Download-URL>" +" <PCT-Download-URL_url>ftp://ftp-private.ncbi.nlm.nih.gov/pubchem/.fetch/623889957832777372.sdf</PCT-Download-URL_url>" +" </PCT-Download-URL>" +" </PCT-OutputData_output_download-url>" +" </PCT-OutputData_output>" +" </PCT-OutputData>" +" </PCT-Data_output>" +"</PCT-Data>"; XmlTwig twig = new XmlTwig (new ByteArrayInputStream (xml1.getBytes())); String path = "PCT-Data/PCT-Data_output/PCT-OutputData/PCT-OutputData_status/PCT-Status-Message/PCT-Status-Message_status/PCT-Status"; System.out.println (path + "=" + twig.getElementAttrValue(path, "value")); path = "PCT-Data/PCT-Data_output/PCT-OutputData/PCT-OutputData_output/PCT-OutputData_output_download-url/PCT-Download-URL/PCT-Download-URL_url"; System.out.println(path + "=" + twig.getElementValue(path)); path = "PCT-Data/PCT-Data_output/PCT-OutputData/PCT-OutputData_status/PCT-Status-Message"; Element elm = twig.getElement(path); System.out.println(path + ": " + (elm != null ? "yes" : "no")); if (elm != null) { Element elm2 = XmlTwig.getElement(elm, "PCT-Status-Message/PCT-Status-Message_status/PCT-Status"); System.out.println("** " + elm2 + " " + elm2.getAttribute("value")); } } }