/* $Id: XMLDoc.java 988245 2010-08-23 18:39:35Z kwright $ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.core.common;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.io.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.w3c.dom.*;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.core.system.Logging;
public class XMLDoc
{
public static final String _rcsid = "@(#)$Id: XMLDoc.java 988245 2010-08-23 18:39:35Z kwright $";
private static final String _wildchar = "*";
private static final String _slash = "/";
private static int _blocksiz = 1024;
private Document _doc = null; // parsed xml doc tree
/** Return the document root; may be null
* @return document object
*/
protected Object getDocument() {return _doc;}
protected void setDocument(Object d) {_doc = (Document)d;}
/* Path is form of root/node/node... and data
* is returned from bottom most node specified.
*
* Worker function to process some simple wildcards in
* a specified xpath-like string
* THIS IS THE ONLY WAY TO GET THE XML NODES FROM THE DOC
* IE SPECIFING A PATH
* NOTE wildcards are supported BUT the evaluation of wildcards
* is not recursive. IE if the path presented is THIS/[wildchar]/THAT
* only ONE list of element is returned for the first child of THIS.
* All children of THIS that have subchildren THAT are NOT returned!
*
* @param xnode like path
* @param start node
*/
public ArrayList processPath(String path, Object o)
{
ArrayList l = new ArrayList();
processPath(l, path, o);
return l;
}
public void processPath(ArrayList returnList, String path,
Object currentRoot)
{
Object element = currentRoot;
StringBuilder bf = new StringBuilder();
boolean bWild = false;
ArrayList working = new ArrayList();
if (path.endsWith(_slash))
{
path += _wildchar;
}
StringTokenizer tokenizer = new StringTokenizer(path, _slash, false);
int depth=0, pathDepth = tokenizer.countTokens();
String attribute=null, value=null;
while(tokenizer.hasMoreTokens())
{
depth++;
// Tokenizer returns true always at least
// once, so watch out for dead string
String s = tokenizer.nextToken().trim();
if (s != null && s.length() > 0)
{
String elementName;
attribute = value =null;
s = s.trim();
// Check for "pathelement qualifier" in
// each term, for example a path could be
// "root/user name=Fred" meaning find the
// the user element where attribute name==Fred.
// This extension is fixed and immutable and is
// not well error checked
if (s.indexOf('=') > -1)
{
// Any "wildcards" are recorded.
bWild = true;
// Well known form
int i = s.indexOf(' ');
elementName = s.substring(0, i);
s = s.substring(i).trim();
i = s.indexOf('=');
attribute = s.substring(0, i);
value = s.substring(i+1);
bf.append('/').append(elementName).append(attribute);
bf.append('=').append(value);
}
else
{
elementName = s;
if (elementName.equals(_wildchar))
{
elementName = null; // find anything
}
else
{
bf.append("/").append(s);
}
}
// Finding specific instance??
ArrayList l = getElements(element, elementName);
element = null; // forget path to this point
// If depth==pathDepth, just save the final arraylist
if (depth==pathDepth)
{
working.addAll(l);
}
else
{
int i = searchArrayForAttribute(l, 0, attribute, value);
if (i != -1)
{
element = l.get(i);
}
}
if (element==null)
{
break; //!
}
}
}
// UGH - so, what we do here is take the list
// and prune out stuff that doesn't match
if (bWild)
{
for (int i = 0; i < working.size(); i++)
{
int j = searchArrayForAttribute(working, i, attribute, value);
if (j > -1)
{
// Add a simple XML element (node)
// to the list
returnList.add(working.get(j));
}
else
{
// no more matching nodes
break;
}
}
}
else
{
// It's everything, but it is in simple
// XML element (node) form.
returnList.addAll(working);
}
}
/** Having collected an arraylist from a given
* depth in the tree, scan the node for the current
* attribute specified (part of wildcard matching
* of xpath-like element specification)
*
* @param l list of elements found
* @param i starting index
* @param attribute String to find
* @param value String attribute value to match
*/
protected int searchArrayForAttribute(ArrayList l, int i, String attribute, String value)
{
int index = -1;
for (; i < l.size(); i++)
{
Object element = l.get(i);
if (attribute == null || attribute.length() == 0)
{
index = i;
break; // nothing special, first one
}
else if (value.equals(getValue(element, attribute)))
{
index = i;
break;
}
}
return index;
}
/** Serialize the document object to a safe string
* @return xml raw text
*/
public String getXML()
throws ManifoldCFException
{
return new String(toByteArray(), StandardCharsets.UTF_8);
}
/** Get XML with no entity preamble */
public String getXMLNoEntityPreamble()
throws ManifoldCFException
{
String initial = getXML();
int index = initial.indexOf(">");
return initial.substring(index+1);
}
/** Convert the response for transmit
* @return xml in byte array
*/
public byte[] toByteArray()
throws ManifoldCFException
{
ByteArrayOutputStream os = new ByteArrayOutputStream(_blocksiz);
dumpOutput(os);
return os.toByteArray();
}
/** Creates the empty doc
*/
public XMLDoc()
throws ManifoldCFException
{
try
{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setEntityResolver(new MyEntityResolver());
_doc = builder.newDocument();
}
catch (Exception e)
{
throw new ManifoldCFException("Error setting up parser: "+e.getMessage(),e);
}
}
/** Construct a new document tree from a string form of
* an xml document
* @param data xml to parse
*/
public XMLDoc(String data)
throws ManifoldCFException
{
ByteArrayInputStream bis = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8));
_doc = init(bis);
}
/** Construct a new document tree from a StringBuilder form of
* an xml document
* @param data xml to parse
*/
public XMLDoc(StringBuilder data)
throws ManifoldCFException
{
ByteArrayInputStream bis =
new ByteArrayInputStream(data.toString().getBytes(StandardCharsets.UTF_8));
_doc = init(bis);
}
/** Build a document object tree from an input
* stream
* @param is InputStream of xml to parse
*/
public XMLDoc(InputStream is)
throws ManifoldCFException
{
_doc = init(is);
}
/** Construct a document from all the children of an existing element object from another document.
*/
public XMLDoc(XMLDoc oldDoc, Object parent)
throws ManifoldCFException
{
try
{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setEntityResolver(new MyEntityResolver());
_doc = builder.newDocument();
// Now, loop through the document or element's children and transfer them
NodeList nodes;
if (parent == null)
nodes = oldDoc._doc.getChildNodes();
else
nodes = ((Node)parent).getChildNodes();
int sz = nodes.getLength();
for (int index = 0; index < sz; index++)
{
Node node = nodes.item(index);
if (node.getNodeType() == Node.ELEMENT_NODE)
_doc.appendChild(duplicateNode(node));
}
}
catch (Exception e)
{
throw new ManifoldCFException("Error setting up parser: "+e.getMessage(),e);
}
}
private Document init(InputStream is)
throws ManifoldCFException
{
Document doc = null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
DocumentBuilder builder;
builder = factory.newDocumentBuilder();
builder.setEntityResolver(new MyEntityResolver());
doc = builder.parse(is);
}
catch (Exception e)
{
if (Logging.misc.isDebugEnabled())
{
// We want to output some context. But there are two problems.
// First, we don't know the encoding. Second, we don't have infinite memory.
StringWriter sw = new StringWriter();
try
{
// This won't work for all streams, but we catch the exception
is.reset();
byte[] buf = new byte[65536];
int len = is.read(buf);
if (len != -1)
{
// Append the bytes we have, and stop. Presume the encoding is utf-8;
// if we're wrong it will come out as garbage, but that can't be helped.
sw.append(new String(buf, 0, len, StandardCharsets.UTF_8));
if (len == buf.length)
sw.append("...");
}
}
catch(Exception e1)
{
// ignore
}
Logging.misc.debug(sw.toString(), e);
}
throw new ManifoldCFException("XML parsing error: "+e.getMessage(),e);
}
return doc;
}
/** Return the value of a named attribute
* @param elo Object to ask
* @param a String attribute to find
* @return String value
*/
public final String getValue(Object elo, String a)
{
Element el = (Element)elo;
return (String)(el.getAttribute(a));
}
/** Return element name.
* May return null if node not
* of type Element
* @param el Object to ask
* @return String value
*/
public final String getNodeName(Object el)
{
String name = null;
Node node = (Node)el;
if (node.getNodeType() == Node.ELEMENT_NODE)
{
name = node.getNodeName();
}
return name;
}
/** Get TEXT element value as single string.
* @param obj Element to grab data
* @return TXT collapsed for this element
* ie [tag]Julie[/tag] returns "Julie"
*/
public final String getData(Object obj)
{
Node enode = (Node)obj;
StringBuilder data = new StringBuilder();
NodeList cdata = enode.getChildNodes();
// expect just 1
int sz = cdata.getLength();
for (int j = 0; j < sz; j++)
{
Node node = cdata.item(j);
if (node.getNodeType() == Node.TEXT_NODE)
{
Text sec = (Text)node;
sec.normalize();
data.append(sec.getData().trim());
}
else if (node.getNodeType() == Node.CDATA_SECTION_NODE)
{
CDATASection sec = (CDATASection)node;
data.append(sec.getData().trim());
}
}
return data.toString();
}
/** Return root node
* @return untyped object for later use
*/
public Object getRoot()
{
return getRoot(_doc);
}
/** Return root node
* @param obj Object document, might not be 'this'
* @return untyped object for later use
*/
public Object getRoot(Object obj)
{
NodeList nodes = ((Document)obj).getChildNodes();
return nodes.item(0);
}
/** Return all nodes belonging to this node;
* Suppling null means the document is root.
* @param n Object to ask
* @return ArrayList of objects
*/
private final ArrayList getElements(Object n)
{
return getElements(n, null);
}
/** Extract the attribute names from the given
* node. If 'n' is not a node, no attributes
* will be returned but the array will not be null
* @param n Object to ask
* @return ArrayList of attribute names
*/
public final ArrayList getAttributes(Object n)
{
ArrayList atts = new ArrayList();
NamedNodeMap map = ((Node)n).getAttributes();
for (int i = 0; i < map.getLength(); i++)
{
Attr att = (Attr)map.item(i);
atts.add(att.getName());
}
return atts;
}
/** Return the first object to match tagname
* @param parent Object
* @param tagname String nodename
* @return null or found element (Object)
*/
public Object getElement(Object parent, String tagname)
{
ArrayList l = getElements(parent, tagname);
if (l.size() < 1)
{
return null;
}
return l.get(0);
}
/**
* Get the elements of this element by name
* @param parent Object element
* @param tagname String matching elements (tag name), comma seperated ok
* @return ArrayListist of nodes
*/
private final ArrayList getElements(Object parent, String tagname)
{
ArrayList list = new ArrayList();
NodeList nodes = (parent==null ? _doc.getChildNodes()
: ((Node)parent).getChildNodes());
int sz = nodes.getLength();
ArrayList tags = new ArrayList();
int tagsz = 0;
// Supplied tagname(s)?
if (tagname!=null)
{
StringTokenizer st = new StringTokenizer(tagname, ",");
while (st.hasMoreTokens())
{
tags.add(st.nextToken());
}
}
// Process found elements
tagsz = tags.size();
for (int index = 0; index < sz; index++)
{
Node node = nodes.item(index);
if (node.getNodeType() == Node.ELEMENT_NODE)
{
String theTag = node.getNodeName();
// Add all
if (tagsz == 0)
{
list.add(node);
}
// Add matches only
else
{
for (int j = 0; j < tagsz; j++)
{
if (theTag.equalsIgnoreCase((String)tags.get(j)))
{
list.add(node);
break; // done, one match only possible
}
}
}
}
}
return list;
}
/*************************************************************************
*************************************************************************
*************************************************************************
*/
/** Create an element
* @param who Object parent Node
* @param ename String element name
* @return Object element
*/
public Object createElement(Object who, String ename)
{
Element element = _doc.createElement(ename);
if (who==null)
{
_doc.appendChild(element);
}
else
{
((Element)who).appendChild(element);
}
return element;
}
/** Add the children of another document's node as the children of this node.
*/
public void addDocumentElement(Object where, XMLDoc oldDoc, Object parent)
{
// Now, loop through the document or element's children and transfer them
NodeList nodes;
if (parent == null)
nodes = oldDoc._doc.getChildNodes();
else
nodes = ((Node)parent).getChildNodes();
int sz = nodes.getLength();
for (int index = 0; index < sz; index++)
{
Node node = nodes.item(index);
if (where == null)
_doc.appendChild(duplicateNode(node));
else
((Element)where).appendChild(duplicateNode(node));
}
}
/** Set an attribute on an element
* @param e Object element to modify
* @param sName String attribute name
* @param sValue String attribute value
*/
public void setAttribute(Object e, String sName, String sValue)
{
((Element)e).setAttribute(sName, sValue);
}
/** Create a free-form data value (vs attribute value=)
* @param who Object
* @param data String text to add as cdata/text
*/
public Object createText(Object who, String data)
{
Text element = _doc.createTextNode(data);
if (who==null)
{
_doc.appendChild(element);
}
else
{
((Element)who).appendChild(element);
}
return element;
}
/** Make a (deep) copy of a node.
*@param node is the node object
*@return the local copy.
*/
protected Node duplicateNode(Node node)
{
Node rval;
// First, figure out what type it is
int type = node.getNodeType();
switch (type)
{
case Node.ELEMENT_NODE:
rval = _doc.createElement(node.getNodeName());
// Copy attributes
NamedNodeMap nmap = node.getAttributes();
int i = 0;
while (i < nmap.getLength())
{
Attr attribute = (Attr)nmap.item(i++);
((Element)rval).setAttribute(attribute.getName(),attribute.getValue());
}
// Copy children
NodeList children = node.getChildNodes();
i = 0;
while (i < children.getLength())
{
rval.appendChild(duplicateNode(children.item(i++)));
}
break;
case Node.TEXT_NODE:
// Get the data
rval = _doc.createTextNode(((Text)node).getData());
break;
case Node.CDATA_SECTION_NODE:
// Create a CDATA section
rval = _doc.createCDATASection(((CDATASection)node).getNodeValue());
break;
case Node.COMMENT_NODE:
rval = _doc.createComment(((Comment)node).getNodeValue());
break;
default:
//System.out.println("Unknown node: "+Integer.toString(type));
return null;
}
return rval;
}
// Transform the output for serialization
private void dumpOutput(OutputStream os)
throws ManifoldCFException
{
try
{
StreamResult res = new StreamResult(os);
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer transformer = tFactory.newTransformer();
DOMSource source = new DOMSource(_doc);
transformer.transform(source, res);
}
catch (Exception e)
{
throw new ManifoldCFException("Error dumping output: "+e.getMessage(),e);
}
}
protected static class MyEntityResolver implements org.xml.sax.EntityResolver
{
public org.xml.sax.InputSource resolveEntity(java.lang.String publicId, java.lang.String systemId)
throws SAXException, java.io.IOException
{
// ALL references resolve to blank documents
return new org.xml.sax.InputSource(new ByteArrayInputStream("<?xml version='1.0' encoding='UTF-8'?>".getBytes(StandardCharsets.UTF_8)));
}
}
}