package org.emdev.common.xml.parsers;
import java.util.Arrays;
import org.emdev.common.xml.IContentHandler;
import org.emdev.common.xml.IXmlTagFactory;
import org.emdev.common.xml.TextProvider;
import org.emdev.common.xml.tags.XmlTag;
import org.emdev.utils.StringUtils;
public class DuckbillParser {
public void parse(final TextProvider text, final IXmlTagFactory factory, final IContentHandler handler)
throws Exception {
final char[] xmlChars = text.chars;
final int length = text.size;
final XmlReader r = new XmlReader(xmlChars, length);
int charsStart = -1;
while (r.XmlOffset < length) {
// Check if START_TAG, END_TAG or COMMENT tokens
if (r.skipChar('<')) {
// Process text of parent element in case of START_TAG or current element in case of END_TAG
if (charsStart != -1) {
if (!handler.skipCharacters()) {
handler.characters(text, charsStart, r.XmlOffset - 1 - charsStart);
}
charsStart = -1;
}
// Check for COMMENT token
r.push();
if (r.skipChar('!') && r.skipChar('-') && r.skipChar('-')) {
// Process COMMENT token
r.pop();
r.skipComment();
continue;
}
r.pop();
// Check for END_TAG token
if (r.skipChar('/')) {
// Process END_TAG token
final int tagNameStart = r.XmlOffset;
r.skipTagName();
final XmlTag tag = factory.getTagByName(r.XmlDoc, tagNameStart, r.XmlOffset - tagNameStart);
handler.endElement(tag);
r.skipTo('>');
r.XmlOffset++;
continue;
}
// Process START_TAG token
final int tagNameStart = r.XmlOffset;
r.skipTagName();
final XmlTag tag = factory.getTagByName(r.XmlDoc, tagNameStart, r.XmlOffset - tagNameStart);
// Process tag attributes
if (handler.parseAttributes(tag)) {
final String[] attributes = r.fillAttributes(tag);
handler.startElement(tag, attributes);
} else {
handler.startElement(tag);
}
r.skipToEndTag();
// Check for closed tag
if (r.skipChar('/') && r.skipChar('>')) {
// Process closed tag
handler.endElement(tag);
continue;
}
} else {
// Process text
if (charsStart == -1) {
charsStart = r.XmlOffset;
}
// Check for entity
if (r.XmlDoc[r.XmlOffset] == '&') {
r.push();
if (r.skipTo(';')) {
final int endOfEntity = r.XmlOffset;
r.pop();
final int startOfEntity = r.XmlOffset;
r.XmlOffset++;
char entity = (char) -1;
if (r.skipChar('#')) {
if (r.skipChar('x') || r.skipChar('X')) {
entity = (char) StringUtils.parseInt(r.XmlDoc, r.XmlOffset, endOfEntity - r.XmlOffset,
16);
} else {
entity = (char) StringUtils.parseInt(r.XmlDoc, r.XmlOffset, endOfEntity - r.XmlOffset,
10);
}
} else {
final int idx = r.XmlOffset;
if (r.XmlDoc[idx] == 'q' && r.XmlDoc[idx + 1] == 'o' && r.XmlDoc[idx + 2] == 'u'
&& r.XmlDoc[idx + 3] == 't' && r.XmlDoc[idx + 4] == ';') {
// quot
entity = 34;
} else if (r.XmlDoc[idx] == 'a' && r.XmlDoc[idx + 1] == 'm' && r.XmlDoc[idx + 2] == 'p'
&& r.XmlDoc[idx + 3] == ';') {
// amp
entity = 38;
} else if (r.XmlDoc[idx] == 'a' && r.XmlDoc[idx + 1] == 'p' && r.XmlDoc[idx + 2] == 'o'
&& r.XmlDoc[idx + 3] == 's' && r.XmlDoc[idx + 4] == ';') {
// apos
entity = 39;
} else if (r.XmlDoc[idx] == 'l' && r.XmlDoc[idx + 1] == 't' && r.XmlDoc[idx + 2] == ';') {
// lt
entity = 60;
} else if (r.XmlDoc[idx] == 'g' && r.XmlDoc[idx + 1] == 't' && r.XmlDoc[idx + 2] == ';') {
// gt
entity = 62;
}
}
if (entity != -1) {
r.XmlDoc[startOfEntity] = entity;
for (int i = startOfEntity + 1; i <= endOfEntity; i++) {
r.XmlDoc[i] = 0;
}
} else {
r.XmlOffset = startOfEntity + 1;
}
} else {
r.pop();
}
}
}
// Next token
r.XmlOffset++;
}
}
private class XmlReader {
public final char[] XmlDoc;
public int XmlOffset = 0;
public final int XmlLength;
private final int[] stack = new int[1024];
private int stackOffset = 0;
public XmlReader(final char[] xmlDoc, final int xmlLength) {
XmlDoc = xmlDoc;
XmlLength = xmlLength;
}
public boolean skipChar(final char c) {
if (XmlDoc[XmlOffset] == c) {
XmlOffset++;
return true;
}
return false;
}
public void push() {
stack[stackOffset++] = XmlOffset;
}
public void pop() {
XmlOffset = stack[--stackOffset];
}
public void skipComment() {
while (XmlOffset < XmlLength) {
push();
if (skipChar('-') && skipChar('-') && skipChar('>')) {
break;
}
pop();
XmlOffset++;
}
}
public void skipTagName() {
while (XmlOffset < XmlLength) {
if (((XmlDoc[XmlOffset] >= 0x1c && XmlDoc[XmlOffset] <= 0x20) || (XmlDoc[XmlOffset] >= 0x9 && XmlDoc[XmlOffset] <= 0xd))
|| (XmlDoc[XmlOffset] == '/' && XmlDoc[XmlOffset + 1] == '>') || XmlDoc[XmlOffset] == '>') {
break;
}
XmlOffset++;
}
}
public boolean skipTo(final char c) {
while (XmlOffset < XmlLength) {
if (XmlDoc[XmlOffset] == c) {
return true;
}
XmlOffset++;
}
return false;
}
public void skipToEndTag() {
while (XmlOffset < XmlLength) {
if ((XmlDoc[XmlOffset] == '/' && XmlDoc[XmlOffset + 1] == '>') || XmlDoc[XmlOffset] == '>') {
break;
}
XmlOffset++;
}
}
public String[] fillAttributes(final XmlTag tag) {
if (tag.attributes.length == 0) {
return null;
}
final String[] res = new String[tag.attributes.length];
push();
final int start = XmlOffset;
skipToEndTag();
final int end = XmlOffset;
pop();
String attrs = new String(XmlDoc, start, end - start).trim();
for (int index = attrs.indexOf("="); index > 0; index = attrs.indexOf("=")) {
final String[] qName = attrs.substring(0, index).trim().split(":");
final String attrName = qName[qName.length - 1];
attrs = attrs.substring(index + 1).trim();
final char quote = attrs.charAt(0);
if (quote == '"' || quote == '\'') {
final int qIndex = attrs.indexOf(quote, 1);
if (qIndex > 0) {
final String attrValue = attrs.substring(1, qIndex);
final int i = Arrays.binarySearch(tag.attributes, attrName);
if (i >= 0) {
res[i] = attrValue;
}
attrs = attrs.substring(qIndex + 1).trim();
} else {
break;
}
} else {
break;
}
}
return res;
}
}
}