/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.build.doc;
/**
* This class implements a simple XML pull parser.
* Only a subset of the XML pull parser API is implemented.
*/
public class XMLParser {
/**
* This event type means an error occurred.
*/
public static final int ERROR = 0;
/**
* This event type means a start element has been read.
*/
public static final int START_ELEMENT = 1;
/**
* This event type means an end element has been read.
*/
public static final int END_ELEMENT = 2;
/**
* This event type means a processing instruction has been read.
*/
public static final int PROCESSING_INSTRUCTION = 3;
/**
* This event type means text has been read.
*/
public static final int CHARACTERS = 4;
/**
* This event type means a comment has been read.
*/
public static final int COMMENT = 5;
// public static final int SPACE = 6;
/**
* This event type is used before reading.
*/
public static final int START_DOCUMENT = 7;
/**
* This event type means the end of the document has been reached.
*/
public static final int END_DOCUMENT = 8;
// public static final int ENTITY_REFERENCE = 9;
// public static final int ATTRIBUTE = 10;
/**
* This event type means a DTD element has been read.
*/
public static final int DTD = 11;
private final String xml;
private int pos;
private int eventType;
private String currentText;
private String currentToken;
private String prefix, localName;
private String[] attributeValues = new String[3];
private int currentAttribute;
private boolean endElement;
private boolean html;
/**
* Construct a new XML parser.
*
* @param xml the document
*/
public XMLParser(String xml) {
this.xml = xml;
eventType = START_DOCUMENT;
}
/**
* Enable or disable HTML processing. When enabled, attributes don't need to
* have values.
*
* @param html true if HTML processing is enabled.
*/
public void setHTML(boolean html) {
this.html = html;
}
private void addAttributeName(String pre, String name) {
if (attributeValues.length <= currentAttribute) {
String[] temp = new String[attributeValues.length * 2];
System.arraycopy(attributeValues, 0, temp, 0, attributeValues.length);
attributeValues = temp;
}
attributeValues[currentAttribute++] = pre;
attributeValues[currentAttribute++] = name;
}
private void addAttributeValue(String v) {
attributeValues[currentAttribute++] = v;
}
private int readChar() {
if (pos >= xml.length()) {
return -1;
}
return xml.charAt(pos++);
}
private void back() {
pos--;
}
private void error(String expected) {
throw new RuntimeException("Expected: " + expected + " got: " + xml.substring(pos, Math.min(pos + 1000, xml.length())));
}
private void read(String chars) {
for (int i = 0; i < chars.length(); i++) {
if (readChar() != chars.charAt(i)) {
error(chars);
}
}
}
private void skipSpaces() {
while (pos < xml.length() && xml.charAt(pos) <= ' ') {
pos++;
}
}
private void read() {
currentText = null;
currentAttribute = 0;
int tokenStart = pos, currentStart = pos;
int ch = readChar();
if (ch < 0) {
eventType = END_DOCUMENT;
} else if (ch == '<') {
currentStart = pos;
ch = readChar();
if (ch < 0) {
eventType = ERROR;
} else if (ch == '?') {
eventType = PROCESSING_INSTRUCTION;
currentStart = pos;
while (true) {
ch = readChar();
if (ch < 0) {
error("?>");
}
if (ch == '?' && readChar() == '>') {
break;
}
}
if (xml.substring(currentStart).startsWith("xml")) {
int back = tokenStart;
read();
tokenStart = back;
} else {
currentText = xml.substring(currentStart, pos - 1);
}
} else if (ch == '!') {
ch = readChar();
if (ch == '-') {
eventType = COMMENT;
if (readChar() != '-') {
error("-");
}
currentStart = pos;
while (true) {
ch = readChar();
if (ch < 0) {
error("-->");
}
if (ch == '-' && readChar() == '-') {
read(">");
break;
}
}
currentText = xml.substring(currentStart, pos - 1);
} else if (ch == 'D') {
read("OCTYPE");
eventType = DTD;
while (true) {
ch = readChar();
if (ch < 0) {
break;
}
if (ch == '>') {
break;
}
}
} else if (ch == '[') {
read("CDATA[");
currentStart = pos;
eventType = CHARACTERS;
while (true) {
ch = readChar();
if (ch < 0) {
error("]]>");
} else if (ch != ']') {
continue;
}
ch = readChar();
if (ch < 0) {
error("]]>");
} else if (ch == ']') {
do {
ch = readChar();
if (ch < 0) {
error("]]>");
}
} while (ch == ']');
if (ch == '>') {
currentText = xml.substring(currentStart, pos - 3);
break;
}
}
}
}
} else if (ch == '/') {
currentStart = pos;
prefix = null;
eventType = END_ELEMENT;
while (true) {
ch = readChar();
if (ch < 0) {
error(">");
} else if (ch == ':') {
prefix = xml.substring(currentStart, pos - 1);
currentStart = pos + 1;
} else if (ch == '>') {
localName = xml.substring(currentStart, pos - 1);
break;
} else if (ch <= ' ') {
localName = xml.substring(currentStart, pos - 1);
skipSpaces();
read(">");
break;
}
}
} else {
prefix = null;
localName = null;
eventType = START_ELEMENT;
while (true) {
ch = readChar();
if (ch < 0) {
error(">");
} else if (ch == ':') {
prefix = xml.substring(currentStart, pos - 1);
currentStart = pos + 1;
} else if (ch <= ' ') {
localName = xml.substring(currentStart, pos - 1);
readAttributeValues();
ch = readChar();
}
if (ch == '/') {
if (localName == null) {
localName = xml.substring(currentStart, pos - 1);
}
read(">");
endElement = true;
break;
} else if (ch == '>') {
if (localName == null) {
localName = xml.substring(currentStart, pos - 1);
}
break;
}
}
}
} else {
// TODO need to replace x;?
eventType = CHARACTERS;
while (true) {
ch = readChar();
if (ch < 0) {
break;
} else if (ch == '<') {
back();
break;
}
}
currentText = xml.substring(currentStart, pos);
}
currentToken = xml.substring(tokenStart, pos);
}
private void readAttributeValues() {
while (true) {
int start = pos;
int ch = readChar();
if (ch < 0) {
error(">");
} else if (ch <= ' ') {
continue;
} else if (ch == '/' || ch == '>') {
back();
return;
}
int end;
int localNameStart = start;
boolean noValue = false;
while (true) {
end = pos;
ch = readChar();
if (ch < 0) {
error("=");
} else if (ch <= ' ') {
skipSpaces();
ch = readChar();
if (ch != '=') {
if (html) {
back();
noValue = true;
} else {
error("=");
}
}
break;
} else if (ch == '=') {
break;
} else if (ch == ':') {
localNameStart = pos;
} else if (ch == '/' || ch == '>') {
if (html) {
back();
noValue = true;
break;
}
error("=");
}
}
if (localNameStart == start) {
addAttributeName("", xml.substring(localNameStart, end));
} else {
addAttributeName(xml.substring(start, localNameStart - 1), xml.substring(localNameStart, end));
}
if (noValue) {
noValue = false;
} else {
skipSpaces();
ch = readChar();
if (ch != '\"') {
error("\"");
}
start = pos;
while (true) {
end = pos;
ch = readChar();
if (ch < 0) {
error("\"");
} else if (ch == '\"') {
break;
}
}
}
addAttributeValue(xml.substring(start, end));
}
}
/**
* Check if there are more tags to read.
*
* @return true if there are more tags
*/
public boolean hasNext() {
return pos < xml.length();
}
/**
* Read the next tag.
*
* @return the event type of the next tag
*/
public int next() {
if (endElement) {
endElement = false;
eventType = END_ELEMENT;
currentToken = "";
} else {
read();
}
return eventType;
}
/**
* Read the next start, end, or character tag. This method skips comments,
* DTDs, and processing instructions.
*
* @return the event type of the next tag
*/
public int nextTag() {
while (true) {
int type = next();
if (type != COMMENT && type != DTD && type != PROCESSING_INSTRUCTION) {
return type;
}
}
}
/**
* Get the event type of the current token.
*
* @return the event type
*/
public int getEventType() {
return eventType;
}
/**
* Get the current text.
*
* @return the text
*/
public String getText() {
return currentText;
}
/**
* Get the current token text.
*
* @return the token
*/
public String getToken() {
return currentToken;
}
/**
* Get the number of attributes.
*
* @return the attribute count
*/
public int getAttributeCount() {
return currentAttribute / 3;
}
/**
* Get the prefix of the attribute.
*
* @param index the index of the attribute (starting with 0)
* @return the prefix
*/
public String getAttributePrefix(int index) {
return attributeValues[index * 3];
}
/**
* Get the local name of the attribute.
*
* @param index the index of the attribute (starting with 0)
* @return the local name
*/
public String getAttributeLocalName(int index) {
return attributeValues[index * 3 + 1];
}
/**
* Get the full name of the attribute. If there is no prefix, only the local
* name is returned, otherwise the prefix, ':', and the local name.
*
* @param index the index of the attribute (starting with 0)
* @return the full name
*/
public String getAttributeName(int index) {
String pre = getAttributePrefix(index);
String name = getAttributeLocalName(index);
return pre == null || pre.length() == 0 ? name : pre + ":" + name;
}
/**
* Get the value of this attribute.
*
* @param index the index of the attribute (starting with 0)
* @return the value
*/
public String getAttributeValue(int index) {
return attributeValues[index * 3 + 2];
}
/**
* Get the value of this attribute.
*
* @param namespaceURI the namespace URI (currently ignored)
* @param name the local name of the attribute
* @return the value or null
*/
public String getAttributeValue(String namespaceURI, String name) {
int len = getAttributeCount();
for (int i = 0; i < len; i++) {
if (getAttributeLocalName(i).equals(name)) {
return getAttributeValue(i);
}
}
return null;
}
/**
* Get the full name of the current start or end element. If there is no
* prefix, only the local name is returned, otherwise the prefix, ':', and
* the local name.
*
* @return the full name
*/
public String getName() {
return prefix == null || prefix.length() == 0 ? localName : prefix + ":" + localName;
}
/**
* Get the local name of the current start or end element.
*
* @return the local name
*/
public String getLocalName() {
return localName;
}
/**
* Get the prefix of the current start or end element.
*
* @return the prefix
*/
public String getPrefix() {
return prefix;
}
/**
* Check if the current character tag only contains spaces or other
* non-printable characters.
*
* @return if the trimmed text is empty
*/
public boolean isWhiteSpace() {
return getText().trim().length() == 0;
}
/**
* Get the remaining XML text of the document.
*
* @return the remaining XML
*/
public String getRemaining() {
return xml.substring(pos);
}
/**
* Get the current character position in the XML document.
*
* @return the position
*/
public int getPos() {
return pos;
}
}