/*******************************************************************************
* Copyright (c) 2008, 2009 Bug Labs, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* - Neither the name of Bug Labs, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
package com.buglabs.util.xml;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Stack;
/**
* <code>XMLParser</code> is a highly-simplified XML DOM parser. It does not
* support namespaces.
*/
public class XmlParser {
private static final int[] cdata_start = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' };
private static final int[] cdata_end = { ']', ']', '>' };
private Reader reader;
private Stack<XmlNode> elements;
private XmlNode currentElement;
/**
* If set to true, namespace prefixes are stripped from node and attribute
* names. Otherwise they are simply part of the names.
*/
private boolean ignoreNamespaces;
public XmlParser() {
elements = new Stack<XmlNode>();
currentElement = null;
ignoreNamespaces = false;
}
public XmlParser(boolean ignoreNamespaces) {
this();
this.ignoreNamespaces = ignoreNamespaces;
}
/**
* Parse a string containing XML into a tree of XMLElement nodes.
*
* @param xml
* @return XMLElement
* @throws IOException
*/
public static XmlNode parse(String xml) throws IOException {
XmlParser p = new XmlParser();
return p.parse(new StringReader(xml));
}
/**
* Parse a string containing XML into a tree of XMLElement nodes.
*
* @param xml
* @return XMLElement
* @throws IOException
*/
public static XmlNode parse(String xml, boolean ignoreNamespaces) throws IOException {
XmlParser p = new XmlParser(ignoreNamespaces);
return p.parse(new StringReader(xml));
}
/**
* Parse a string containing XML into a tree of XMLElement nodes.
*
* @param xml
* @return XMLElement
* @throws IOException
*/
public static XmlNode parse(Reader reader, boolean ignoreNamespaces) throws IOException {
XmlParser p = new XmlParser(ignoreNamespaces);
return p.parse(reader);
}
public XmlNode parse(Reader reader) throws IOException {
if (!(reader.markSupported())) {
this.reader = new BufferedReader(reader);
} else {
this.reader = reader;
}
// skip xml declaration or DocTypes
skipPrologs();
while (true) {
int index;
String tagName;
// remove the prepend or trailing white spaces
String currentTag = readTag().trim();
if (currentTag.startsWith("</")) {
// close tag
tagName = currentTag.substring(2, currentTag.length() - 1);
if (ignoreNamespaces) {
tagName = stripNamespace(tagName);
}
// no open tag
if (currentElement == null) {
throw new IOException("Got close tag '" + tagName + "' without open tag.");
}
// close tag does not match with open tag
if (!tagName.equals(currentElement.getName())) {
throw new IOException("Expected close tag for '" + currentElement.getName() + "' but got '" + tagName + "'.");
}
if (elements.empty()) {
// document processing is over
return currentElement;
} else {
// pop up the previous open tag
currentElement = (XmlNode) elements.pop();
}
} else {
// open tag or tag with both open and close tags
index = currentTag.indexOf(" ");
if (index < 0) {
// tag with no attributes
if (currentTag.endsWith("/>")) {
// close tag as well
tagName = currentTag.substring(1, currentTag.length() - 2);
currentTag = "/>";
} else {
// open tag
tagName = currentTag.substring(1, currentTag.length() - 1);
if (ignoreNamespaces) {
tagName = stripNamespace(tagName);
}
currentTag = "";
}
} else {
// tag with attributes
tagName = currentTag.substring(1, index);
if (ignoreNamespaces) {
tagName = stripNamespace(tagName);
}
currentTag = currentTag.substring(index + 1);
}
// create new element
XmlNode element = new XmlNode(tagName);
// parse the attributes
boolean isTagClosed = false;
while (currentTag.length() > 0) {
// remove the prepend or trailing white spaces
currentTag = currentTag.trim();
if (currentTag.equals("/>")) {
// close tag
isTagClosed = true;
break;
} else if (currentTag.equals(">")) {
// open tag
break;
}
index = currentTag.indexOf("=");
if (index < 0) {
throw new IOException("Invalid attribute for tag '" + tagName + "'.");
}
// get attribute name
String attributeName = currentTag.substring(0, index);
if (ignoreNamespaces) {
attributeName = stripNamespace(attributeName);
}
currentTag = currentTag.substring(index + 1);
// get attribute value
String attributeValue;
boolean isQuoted = true;
if (currentTag.startsWith("\"")) {
index = currentTag.indexOf('"', 1);
} else if (currentTag.startsWith("'")) {
index = currentTag.indexOf('\'', 1);
} else {
isQuoted = false;
index = currentTag.indexOf(' ');
if (index < 0) {
index = currentTag.indexOf('>');
if (index < 0) {
index = currentTag.indexOf('/');
}
}
}
if (index < 0) {
throw new IOException("Invalid attribute for tag '" + tagName + "'.");
}
if (isQuoted) {
attributeValue = currentTag.substring(1, index);
} else {
attributeValue = currentTag.substring(0, index);
}
// add attribute to the new element
element.setAttribute(attributeName, attributeValue);
currentTag = currentTag.substring(index + 1);
}
// read the text between the open and close tag
if (!isTagClosed) {
element.setValue(readText());
}
// add new element as a child element of
// the current element
if (currentElement != null) {
element.setParent(currentElement);
}
if (!isTagClosed) {
if (currentElement != null) {
elements.push(currentElement);
}
currentElement = element;
} else if (currentElement == null) {
// only has one tag in the document
return element;
}
}
}
}
private String stripNamespace(String tagName) {
int i = tagName.indexOf(':');
if (i > -1) {
return tagName.substring(i + 1);
}
return tagName;
}
private int peek() throws IOException {
reader.mark(1);
int result = reader.read();
reader.reset();
return result;
}
private void peek(int[] buffer) throws IOException {
reader.mark(buffer.length);
for (int i = 0; i < buffer.length; i++) {
buffer[i] = reader.read();
}
reader.reset();
}
private void skipWhitespace() throws IOException {
while (Character.isWhitespace((char) peek())) {
reader.read();
}
}
private void skipProlog() throws IOException {
// skip "<?" or "<!"
reader.skip(2);
while (true) {
int next = peek();
if (next == '>') {
reader.read();
break;
} else if (next == '<') {
// nesting prolog
skipProlog();
} else {
reader.read();
}
}
}
private void skipPrologs() throws IOException {
while (true) {
skipWhitespace();
int[] next = new int[2];
peek(next);
if (next[0] != '<') {
throw new IOException("Expected '<' but got '" + (char) next[0] + "'.");
}
if ((next[1] == '?') || (next[1] == '!')) {
skipProlog();
} else {
break;
}
}
}
private String readTag() throws IOException {
skipWhitespace();
StringBuffer sb = new StringBuffer();
int next = peek();
if (next != '<') {
throw new IOException("Expected < but got " + (char) next);
}
sb.append((char) reader.read());
while (peek() != '>') {
sb.append((char) reader.read());
}
sb.append((char) reader.read());
return sb.toString();
}
private String readText() throws IOException {
StringBuffer sb = new StringBuffer();
int[] next = new int[cdata_start.length];
peek(next);
if (compareIntArrays(next, cdata_start) == true) {
// CDATA
reader.skip(next.length);
int[] buffer = new int[cdata_end.length];
while (true) {
peek(buffer);
if (compareIntArrays(buffer, cdata_end) == true) {
reader.skip(buffer.length);
break;
} else {
sb.append((char) reader.read());
}
}
} else {
while (peek() != '<') {
sb.append((char) reader.read());
}
}
String text = sb.toString();
if (text.trim().length() == 0) {
return null;
}
return text;
}
private boolean compareIntArrays(int[] a1, int[] a2) {
if (a1.length != a2.length) {
return false;
}
for (int i = 0; i < a1.length; i++) {
if (a1[i] != a2[i]) {
return false;
}
}
return true;
}
public boolean isIgnoreNamespaces() {
return ignoreNamespaces;
}
public void setIgnoreNamespaces(boolean ignoreNamespaces) {
this.ignoreNamespaces = ignoreNamespaces;
}
public static void main(String[] args) throws IOException {
XmlNode xn = parse("<error>org.osgi.framework.InvalidSyntaxException: Null or empty filter." +
"at org.apache.felix.framework.FilterImpl.<init>(FilterImpl.java:48)" +
"at org.apache.felix.framework.BundleContextImpl.createFilter(BundleContextImpl.java:101)" +
"at org.apache.felix.cm.impl.ConfigurationManager.listConfigurations(ConfigurationManager.java:503)" +
"at org.apache.felix.cm.impl.ConfigurationAdminImpl.listConfigurations(ConfigurationAdminImpl.java:124)" +
"at com.buglabs.bug.program.ConfigAdminServlet.getConfigurationXml(ConfigAdminServlet.java:161)" +
"at com.buglabs.bug.program.ConfigAdminServlet.doGet(ConfigAdminServlet.java:70)" +
"at javax.servlet.http.HttpServlet.service(HttpServlet.java:686)" +
"at javax.servlet.http.HttpServlet.service(HttpServlet.java:788)" +
"at com.buglabs.osgi.http.HttpServer.processRequest(HttpServer.java:433)" +
"at com.buglabs.osgi.http.HttpServer.process(HttpServer.java:254)" +
"at com.buglabs.osgi.http.HttpServer.run(HttpServer.java:94)" +
"</error>");
System.out.println(xn.toString());
}
}