// Copyright FreeHEP, 2007
package org.freehep.wbxml;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.xml.sax.SAXException;
/**
* SAX-like Binary XML Parser. There is NO support form namespaces, attrPrefixValues or attrValues.
* Code pages are handled internally. Both attribute and tag code indexes start at 0 and run up.
*
* @author Mark Donszelmann
* @version $Id: WbxmlParser.java 8584 2006-08-10 23:06:37Z duns $
*/
public class WBXMLParser implements WBXML {
private DataInputStream in;
private ContentHandler contentHandler;
private ExtensionHandler extensionHandler;
private Map/*<Integer, String>*/ stringTable;
private int version;
private int publicIdentifierId;
private int charSet;
private int tagPage;
private int attributePage;
private Stack stack = new Stack();
private EntityResolver resolver;
public WBXMLParser(ContentHandler contentHandler) {
this(contentHandler, new DefaultExtensionHandler());
}
public WBXMLParser(ContentHandler contentHandler, ExtensionHandler extensionHandler) {
this.contentHandler = contentHandler;
this.extensionHandler = extensionHandler;
}
public void setEntityResolver(EntityResolver resolver) {
this.resolver = resolver;
}
public int getCharSet() {
return charSet;
}
public int getVersion() {
return version;
}
public void parse(InputStream in) throws SAXException, IOException {
this.in = in instanceof DataInputStream ? (DataInputStream) in
: new DataInputStream(in);
char entityBuf[] = new char[1];
tagPage = 0;
attributePage = 0;
version = readByte();
publicIdentifierId = readInt();
int dtdIndex = 0;
if (publicIdentifierId == 0) {
dtdIndex = readInt();
}
charSet = readInt();
stringTable = new HashMap();
int len = readInt();
int offset = 0;
while (offset < len) {
String s = this.in.readUTF();
int sLen = stringUTFLength(s);
stringTable.put(new Integer(offset), s);
in.read(); // skip NULL termination
// len (short) + null (byte)
offset += sLen + 2 + 1;
}
contentHandler.startDocument();
if (publicIdentifierId == 0) {
String[] dtdPair = ((String)stringTable.get(new Integer(dtdIndex))).split(" ", 2);
if (resolver != null) {
// FIXME, use the resolver stream
resolver.resolveEntity(dtdPair[0], null, dtdPair[1]);
}
}
while (true) {
int id = in.read();
if (id == -1) {
break;
}
switch (id) {
case SWITCH_PAGE:
tagPage = readByte();
break;
case END:
contentHandler.endElement(((Integer) stack.pop()).intValue());
break;
case ENTITY:
entityBuf[0] = (char) readInt();
contentHandler.characters(entityBuf, 0, 1);
break;
case STR_I:
String s = readStrI();
contentHandler.characters(s.toCharArray(), 0, s.length());
break;
case EXT_I_0:
case EXT_I_1:
case EXT_I_2:
case EXT_T_0:
case EXT_T_1:
case EXT_T_2:
case EXT_0:
case EXT_1:
case EXT_2:
case OPAQUE:
int tagID = ((Integer) stack.peek()).intValue();
handleExtensions(id, tagID, -1, null, null);
break;
case PI:
throw new SAXException("PI Not Supported");
case STR_T:
String str = readStrT();
contentHandler.characters(str.toCharArray(), 0, str.length());
break;
default:
readElement(id);
}
}
if (stack.size() != 0) {
throw new SAXException("unclosed elements: " + stack);
}
contentHandler.endDocument();
}
private void handleExtensions(int id, int tagID, int attributeID,
MutableAttributes atts, List value) throws SAXException,
IOException {
switch (id) {
case EXT_I_0:
case EXT_I_1:
case EXT_I_2:
extensionHandler.extI(id - EXT_I_0, readStrI(), tagID, attributeID,
atts, value);
break;
case EXT_T_0:
case EXT_T_1:
case EXT_T_2:
extensionHandler.extT(id - EXT_T_0, readInt(), tagID, attributeID,
atts, value);
break;
case EXT_0:
case EXT_1:
case EXT_2:
extensionHandler.ext(id - EXT_0, tagID, attributeID, atts, value);
break;
case OPAQUE:
int len = readInt();
extensionHandler.opaque(len, in, tagID, attributeID, atts, value);
break;
}
}
private Attributes readAttr(int tagID) throws SAXException, IOException {
AttributesImpl result = new AttributesImpl();
int id = readByte();
int attributeID = -1;
while (id != END) {
// attribute start
while (id == SWITCH_PAGE) {
attributePage = readByte();
id = readByte();
}
attributeID = getAttributeId(id);
List value = new ArrayList();
// attribute value(s)
id = readByte();
while (id > 128 || id == SWITCH_PAGE || id == ENTITY || id == STR_I || id == STR_T
|| (id >= EXT_I_0 && id <= EXT_I_2)
|| (id >= EXT_T_0 && id <= EXT_T_2)) {
switch (id) {
case SWITCH_PAGE:
attributePage = readByte();
break;
case ENTITY:
value.add(new Character((char) readInt()));
break;
case STR_I:
value.add(readStrI());
break;
case EXT_I_0:
case EXT_I_1:
case EXT_I_2:
case EXT_T_0:
case EXT_T_1:
case EXT_T_2:
case EXT_0:
case EXT_1:
case EXT_2:
case OPAQUE:
handleExtensions(id, tagID, attributeID, result,
value);
break;
case STR_T:
value.add(readStrT());
break;
default:
value.add(new Integer(getAttributeId(id)));
}
id = readByte();
}
switch (value.size()) {
case 0:
// already handled
break;
case 1:
Object o = value.get(0);
if (o instanceof Integer) {
result.set(attributeID, ((Integer) o).intValue());
} else if (o instanceof Character) {
result.set(attributeID, ((Character) o).charValue());
} else if (o instanceof String) {
result.set(attributeID, (String) o);
} else {
throw new IOException(getClass() + ": Type " + o.getClass()
+ " not properly handled.");
}
break;
default:
result.set(attributeID, value);
break;
}
attributeID = -1;
}
return result;
}
private int getTagId(int id) {
return (id & 0x03f) + (tagPage * MAX_CODES) - RESERVED_CODES;
}
private int getAttributeId(int id) {
return (id & 0x03f) + (attributePage * MAX_CODES) - RESERVED_CODES;
}
private void readElement(int id) throws IOException, SAXException {
int tagID = getTagId(id & 0x3f);
boolean empty;
if ((id & CONTENT) != 0) {
stack.add(new Integer(tagID));
empty = false;
} else {
empty = true;
}
contentHandler.startElement(tagID, ((id & ATTRIBUTE) != 0) ? readAttr(tagID)
: new AttributesImpl(), empty);
}
protected int readByte() throws IOException, SAXException {
int i = in.read();
if (i == -1) {
throw new SAXException("Unexpected EOF");
}
return i;
}
protected int readInt() throws SAXException, IOException {
int result = 0;
int i;
do {
i = readByte();
result = (result << 7) | (i & 0x7f);
} while ((i & 0x80) != 0);
return result;
}
protected String readStrI() throws IOException, SAXException {
String s = in.readUTF();
in.read(); // skip NULL Termination
return s;
}
protected String readStrT() throws IOException, SAXException {
Integer pos = new Integer(readInt());
return (String)stringTable.get(pos);
}
public static int stringUTFLength(String s) {
int bytesNeeded = 0;
for (int i = 0; i < s.length(); i++) {
if (s.charAt(i) < 0x80) {
++bytesNeeded;
} else if (s.charAt(i) < 0x0800) {
bytesNeeded += 2;
} else if (s.charAt(i) < 0x10000) {
bytesNeeded += 3;
} else {
bytesNeeded += 4;
}
}
return bytesNeeded;
}
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Usage: WBXMLParser filename");
System.exit(1);
}
ContentHandler contentHandler = new ContentHandler() {
public void characters(char[] chars, int start, int len)
throws SAXException {
System.err.print("'" + String.valueOf(chars) + "'");
}
public void endDocument() throws SAXException {
System.err.println("END DOCUMENT");
}
public void endElement(int tagID) throws SAXException {
System.err.println("</" + tagID + ">");
}
public void startDocument() throws SAXException {
System.err.println("START DOCUMENT");
}
public void startElement(int tagID, Attributes attr, boolean empty)
throws SAXException {
System.err.println("<" + tagID);
System.err.println(attr.getTags().length);
if (empty) System.err.print("/");
System.err.println(">");
}
};
WBXMLParser p = new WBXMLParser(contentHandler);
p.parse(new FileInputStream(args[0]));
}
}