/**
* Copyright (C) 2010 Orbeon, Inc.
*
* This program is free software; you can redistribute it and/or modify it under the terms of the
* GNU Lesser General Public License as published by the Free Software Foundation; either version
* 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* The full text of the license is available at http://www.gnu.org/copyleft/lesser.html
*/
package org.orbeon.oxf.xml;
import org.orbeon.dom.Document;
import org.orbeon.oxf.common.OXFException;
import org.orbeon.oxf.pipeline.api.TransformerXMLReceiver;
import org.orbeon.oxf.processor.SAXLoggerProcessor;
import org.orbeon.oxf.xml.dom4j.LocationSAXContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
/**
* SAXStore keeps a compact representation of SAX events sent to the ContentHandler interface.
*
* As of June 2009, we increase the size of buffers by 50% instead of 100%. Still not the greatest way. Possibly,
* passed a threshold, say 10 MB or 20 MB, we could use a linked list of such big blocks.
*
* TODO: Handling of system IDs is not optimal in memory as system IDs are unlikely to change much within a document.
*/
public class SAXStore extends ForwardingXMLReceiver implements Externalizable {
public static final byte START_DOCUMENT = 0x00;
public static final byte END_DOCUMENT = 0x01;
public static final byte START_ELEMENT = 0x02;
public static final byte END_ELEMENT = 0x03;
public static final byte CHARACTERS = 0x04;
public static final byte END_PREFIX_MAPPING = 0x05;
public static final byte IGN_WHITESPACE = 0x06;
public static final byte PI = 0x07;
public static final byte SKIPPED_ENTITY = 0x09;
public static final byte START_PREFIX_MAPPING = 0x0A;
public static final byte COMMENT = 0x0B;
private static final int INITIAL_SIZE = 10;
private byte[] eventBuffer;
private int eventBufferPosition;
private char[] charBuffer;
private int charBufferPosition;
private int[] intBuffer;
private int intBufferPosition;
private int[] lineBuffer;
private int lineBufferPosition;
private String[] systemIdBuffer;
private int systemIdBufferPosition;
private int[] attributeCountBuffer;
private int attributeCountBufferPosition;
private int attributeCount;
private List<String> StringBuilder = new ArrayList<String>();
private boolean hasDocumentLocator;
private String publicId;
private transient Locator locator; // used only for recording events, MUST be cleared afterwards
private final Mark START_MARK = new Mark();
private List<Mark> marks = null;
public class Mark {
public final String id;
public final int eventBufferPosition;
public final int charBufferPosition;
public final int intBufferPosition;
public final int lineBufferPosition;
public final int systemIdBufferPosition;
public final int attributeCountBufferPosition;
public final int StringBuilderPosition;
private Mark() {
id = null;
this.eventBufferPosition = 0;
this.charBufferPosition = 0;
this.intBufferPosition = 0;
this.lineBufferPosition = 0;
this.systemIdBufferPosition = 0;
this.attributeCountBufferPosition = 0;
this.StringBuilderPosition = 0;
}
private Mark(final SAXStore store, final String id) {
this.id = id;
this.eventBufferPosition = store.eventBufferPosition;
this.charBufferPosition = store.charBufferPosition;
this.intBufferPosition = store.intBufferPosition;
this.lineBufferPosition = store.lineBufferPosition;
this.systemIdBufferPosition = store.systemIdBufferPosition;
this.attributeCountBufferPosition = store.attributeCountBufferPosition;
this.StringBuilderPosition = store.StringBuilder.size();
rememberMark();
}
private Mark(final int[] values, final String id) {
this.id = id;
int i = 0;
this.eventBufferPosition = values[i++];
this.charBufferPosition = values[i++];
this.intBufferPosition = values[i++];
this.lineBufferPosition = values[i++];
this.systemIdBufferPosition = values[i++];
this.attributeCountBufferPosition = values[i++];
this.StringBuilderPosition = values[i++];
rememberMark();
}
private void rememberMark() {
// Keep a reference to marks, so that they can be serialized/deserialized along with the SAXStore
if (marks == null)
marks = new ArrayList<Mark>();
marks.add(this);
}
public void replay(XMLReceiver xmlReceiver) throws SAXException {
SAXStore.this.replay(xmlReceiver, this);
}
public SAXStore saxStore() {
return SAXStore.this;
}
}
public long getApproximateSize() {
long size = eventBufferPosition * 4;
size += charBufferPosition;
size += intBufferPosition * 4;
size += lineBufferPosition * 4;
{
String previousId = null;
for (int i = 0; i < systemIdBuffer.length; i++) {
final String currentId = systemIdBuffer[i];
// This is rough, but entries in the list could point to the same string, so we try to detect this case.
if (currentId != null && currentId != previousId)
size += currentId.length() * 2;
previousId = currentId;
}
}
size += attributeCountBufferPosition * 4;
{
String previousString = null;
for (Iterator<String> i = StringBuilder.iterator(); i.hasNext();) {
final String currentString = i.next();
// This is rough, but entries in the list could point to the same string, so we try to detect this case.
if (currentString != null && currentString != previousString)
size += currentString.length() * 2;
previousString = currentString;
}
}
return size;
}
public int getAttributesCount() {
return attributeCount;
}
public SAXStore() {
init();
}
public SAXStore(ObjectInput input) {
try {
readExternal(input);
} catch (Exception e) {
throw new OXFException(e);
}
}
public SAXStore(XMLReceiver xmlReceiver) {
super.setXMLReceiver(xmlReceiver);
init();
}
public Object getValidity() {
return new Long(eventBuffer.hashCode() * charBuffer.hashCode() * intBuffer.hashCode());
}
protected void init() {
eventBufferPosition = 0;
eventBuffer = new byte[INITIAL_SIZE];
charBufferPosition = 0;
charBuffer = new char[INITIAL_SIZE * 4];
intBufferPosition = 0;
intBuffer = new int[INITIAL_SIZE];
lineBufferPosition = 0;
lineBuffer = new int[INITIAL_SIZE];
systemIdBufferPosition = 0;
systemIdBuffer = new String[INITIAL_SIZE];
attributeCountBufferPosition = 0;
attributeCountBuffer = new int[INITIAL_SIZE];
StringBuilder.clear();
locator = null;
}
public void replay(XMLReceiver xmlReceiver) throws SAXException {
replay(xmlReceiver, START_MARK);
}
public void replay(XMLReceiver xmlReceiver, Mark mark) throws SAXException {
int intBufferPos = mark.intBufferPosition;
int charBufferPos = mark.charBufferPosition;
int StringBuilderPos = mark.StringBuilderPosition;
int attributeCountBufferPos = mark.attributeCountBufferPosition;
final int[] lineBufferPos = { mark.lineBufferPosition } ;
final int[] systemIdBufferPos = { mark.systemIdBufferPosition } ;
final AttributesImpl attributes = new AttributesImpl();
int currentEventPosition = mark.eventBufferPosition;
final Locator outputLocator = !hasDocumentLocator ? null : new Locator() {
public String getPublicId() {
return publicId;
}
public String getSystemId() {
try {
return systemIdBuffer[systemIdBufferPos[0]];
} catch (ArrayIndexOutOfBoundsException e) {
return null;
}
}
public int getLineNumber() {
try {
return lineBuffer[lineBufferPos[0]];
} catch (ArrayIndexOutOfBoundsException e) {// FIXME: sometimes this fails
// System.out.println("Incorrect line number: " + lineBufferPos[0]);
//e.printStackTrace();
return -1;
}
}
public int getColumnNumber() {
try {
return lineBuffer[lineBufferPos[0] + 1];
} catch (ArrayIndexOutOfBoundsException e) {// FIXME: sometimes this fails
// System.out.println("Incorrect line number: " + lineBufferPos[0]);
//e.printStackTrace();
return -1;
}
}
};
if (hasDocumentLocator) {
xmlReceiver.setDocumentLocator(outputLocator);
}
// Handle element marks
final boolean handleElementMark = (mark != START_MARK) && (eventBuffer[currentEventPosition] == START_ELEMENT);
int elementLevel = 0;
eventLoop: while (currentEventPosition < eventBufferPosition) {
final byte eventType = eventBuffer[currentEventPosition];
final boolean eventHasLocation = hasDocumentLocator && eventType != END_PREFIX_MAPPING && eventType != START_PREFIX_MAPPING;
switch (eventType) {
case START_DOCUMENT: {
xmlReceiver.startDocument();
break;
}
case START_ELEMENT: {
final String namespaceURI = StringBuilder.get(StringBuilderPos++);
final String localName = StringBuilder.get(StringBuilderPos++);
final String qName = StringBuilder.get(StringBuilderPos++);
attributes.clear();
final int attributeCount = attributeCountBuffer[attributeCountBufferPos++];
for (int i = 0; i < attributeCount; i++) {
attributes.addAttribute(StringBuilder.get(StringBuilderPos++),
StringBuilder.get(StringBuilderPos++), StringBuilder.get(StringBuilderPos++),
StringBuilder.get(StringBuilderPos++), StringBuilder.get(StringBuilderPos++));
}
xmlReceiver.startElement(namespaceURI, localName, qName, attributes);
elementLevel++;
break;
}
case CHARACTERS: {
final int length = intBuffer[intBufferPos++];
xmlReceiver.characters(charBuffer, charBufferPos, length);
charBufferPos += length;
break;
}
case END_ELEMENT: {
elementLevel--;
xmlReceiver.endElement(StringBuilder.get(StringBuilderPos++),
StringBuilder.get(StringBuilderPos++),
StringBuilder.get(StringBuilderPos++));
if (handleElementMark && elementLevel == 0) {
// Back to ground level, we are done!
break eventLoop;
}
break;
}
case END_DOCUMENT: {
xmlReceiver.endDocument();
break;
}
case END_PREFIX_MAPPING: {
xmlReceiver.endPrefixMapping(StringBuilder.get(StringBuilderPos++));
break;
}
case IGN_WHITESPACE: {
final int length = intBuffer[intBufferPos++];
xmlReceiver.ignorableWhitespace(charBuffer, charBufferPos, length);
charBufferPos += length;
break;
}
case PI: {
xmlReceiver.processingInstruction(StringBuilder.get(StringBuilderPos++),
StringBuilder.get(StringBuilderPos++));
break;
}
case SKIPPED_ENTITY: {
xmlReceiver.skippedEntity(StringBuilder.get(StringBuilderPos++));
break;
}
case START_PREFIX_MAPPING: {
xmlReceiver.startPrefixMapping(StringBuilder.get(StringBuilderPos++),
StringBuilder.get(StringBuilderPos++));
break;
}
case COMMENT: {
final int length = intBuffer[intBufferPos++];
xmlReceiver.comment(charBuffer, charBufferPos, length);
charBufferPos += length;
break;
}
}
currentEventPosition++;
if (eventHasLocation) {
lineBufferPos[0] += 2;
systemIdBufferPos[0]++;
}
}
}
// Create a new mark
// NOTE: This must be called *before* the startElement() event that will be the first element associated with the mark.
public Mark getMark(String id) {
return new Mark(this, id);
}
// Return all the marks created
public List<Mark> getMarks() {
return marks != null ? marks : Collections.<Mark>emptyList();
}
/**
* Print to System.out. For debug only.
*/
public void printOut() {
try {
final TransformerXMLReceiver th = TransformerUtils.getIdentityTransformerHandler();
th.setResult(new StreamResult(System.out));
th.startDocument();
replay(th);
th.endDocument();
} catch (SAXException e) {
throw new OXFException(e);
}
}
/**
* This outputs the content to the SAXLoggerProcessor logger. For debug only.
*/
public void logContents() {
try {
replay(new SAXLoggerProcessor.DebugXMLReceiver());
} catch (SAXException e) {
throw new OXFException(e);
}
}
public void clear() {
init();
}
public Document getDocument() {
try {
LocationSAXContentHandler ch = new LocationSAXContentHandler();
replay(ch);
return ch.getDocument();
} catch (SAXException e) {
throw new OXFException(e);
}
}
@Override
public void characters(char[] chars, int start, int length) throws SAXException {
addToEventBuffer(CHARACTERS);
addToCharBuffer(chars, start, length);
addToIntBuffer(length);
addLocation();
super.characters(chars, start, length);
}
@Override
public void endDocument() throws SAXException {
addToEventBuffer(END_DOCUMENT);
addLocation();
super.endDocument();
// The resulting SAXStore should never keep references to whoever filled it
locator = null;
}
@Override
public void endElement(String uri, String localname, String qName) throws SAXException {
addToEventBuffer(END_ELEMENT);
addLocation();
StringBuilder.add(uri);
StringBuilder.add(localname);
StringBuilder.add(qName);
super.endElement(uri, localname, qName);
}
@Override
public void endPrefixMapping(String s) throws SAXException {
addToEventBuffer(END_PREFIX_MAPPING);
// NOTE: We don't keep location data for this event as it is very unlikely to be used
StringBuilder.add(s);
super.endPrefixMapping(s);
}
@Override
public void ignorableWhitespace(char[] chars, int start, int length) throws SAXException {
addToEventBuffer(IGN_WHITESPACE);
addToCharBuffer(chars, start, length);
addToIntBuffer(length);
addLocation();
super.ignorableWhitespace(chars, start, length);
}
@Override
public void processingInstruction(String s, String s1) throws SAXException {
addToEventBuffer(PI);
addLocation();
StringBuilder.add(s);
StringBuilder.add(s1);
super.processingInstruction(s, s1);
}
@Override
public void setDocumentLocator(Locator locator) {
this.hasDocumentLocator = locator != null;
this.locator = locator;
super.setDocumentLocator(locator);
}
@Override
public void skippedEntity(String s) throws SAXException {
addToEventBuffer(SKIPPED_ENTITY);
addLocation();
StringBuilder.add(s);
super.skippedEntity(s);
}
@Override
public void startDocument() throws SAXException {
addToEventBuffer(START_DOCUMENT);
addLocation();
super.startDocument();
}
@Override
public void startElement(String uri, String localname, String qName, Attributes attributes) throws SAXException {
addToEventBuffer(START_ELEMENT);
if (locator != null) {
addToLineBuffer(locator.getLineNumber());
addToLineBuffer(locator.getColumnNumber());
addToSystemIdBuffer(locator.getSystemId());
if (publicId == null && locator.getPublicId() != null)
publicId = locator.getPublicId();
}
StringBuilder.add(uri);
StringBuilder.add(localname);
StringBuilder.add(qName);
addToAttributeBuffer(attributes);
super.startElement(uri, localname, qName, attributes);
}
@Override
public void startPrefixMapping(String s, String s1) throws SAXException {
addToEventBuffer(START_PREFIX_MAPPING);
// NOTE: We don't keep location data for this event as it is very unlikely to be used
StringBuilder.add(s);
StringBuilder.add(s1);
super.startPrefixMapping(s, s1);
}
@Override
public void comment(char[] ch, int start, int length) throws SAXException {
addToEventBuffer(COMMENT);
addToCharBuffer(ch, start, length);
addToIntBuffer(length);
addLocation();
super.comment(ch, start, length);
}
private final void addLocation() {
if (locator != null) {
addToLineBuffer(locator.getLineNumber());
addToLineBuffer(locator.getColumnNumber());
addToSystemIdBuffer(locator.getSystemId());
}
}
protected void addToCharBuffer(char[] chars, int start, int length) {
if (charBuffer.length - charBufferPosition <= length) {
// double the array
char[] old = charBuffer;
try{
charBuffer = new char[old.length * 3 / 2 + 1];
} catch (Error e) {
System.out.println("Out of memory: " + old.length);
throw e;
}
System.arraycopy(old, 0, charBuffer, 0, charBufferPosition);
addToCharBuffer(chars, start, length);
} else {
System.arraycopy(chars, start, charBuffer, charBufferPosition, length);
charBufferPosition += length;
}
}
protected void addToIntBuffer(int i) {
if (intBuffer.length - intBufferPosition == 1) {
// double the array
int[] old = intBuffer;
try{
intBuffer = new int[old.length * 3 / 2 + 1];
} catch (Error e) {
System.out.println("Out of memory: " + old.length);
throw e;
}
System.arraycopy(old, 0, intBuffer, 0, intBufferPosition);
addToIntBuffer(i);
} else {
intBuffer[intBufferPosition++] = i;
}
}
protected void addToLineBuffer(int i) {
if (lineBuffer.length - lineBufferPosition == 1) {
// double the array
int[] old = lineBuffer;
try {
lineBuffer = new int[old.length * 3 / 2 + 1];
} catch (Error e) {
System.out.println("Out of memory: " + old.length);
throw e;
}
System.arraycopy(old, 0, lineBuffer, 0, lineBufferPosition);
addToLineBuffer(i);
} else {
lineBuffer[lineBufferPosition++] = i;
}
}
protected void addToSystemIdBuffer(String systemId) {
// Try to detect contiguous system ids
//
// NOTE: This native method won't work during replay, will need to store number of contiguous identical strings
// as well, and/or use intern().
// if (systemIdBufferPosition > 0 && systemIdBuffer[systemIdBufferPosition] == systemId) {
// return;
// }
if (systemIdBuffer.length - systemIdBufferPosition == 1) {
// double the array
String[] old = systemIdBuffer;
try {
systemIdBuffer = new String[old.length * 3 / 2 + 1];
} catch (Error e) {
System.out.println("Out of memory: " + old.length);
throw e;
}
System.arraycopy(old, 0, systemIdBuffer, 0, systemIdBufferPosition);
addToSystemIdBuffer(systemId);
} else {
systemIdBuffer[systemIdBufferPosition++] = systemId;
}
}
protected void addToEventBuffer(byte b) {
if (eventBuffer.length - eventBufferPosition == 1) {
// double the array
byte[] old = eventBuffer;
try {
eventBuffer = new byte[old.length * 3 / 2 + 1];
} catch (Error e) {
System.out.println("Out of memory: " + old.length);
throw e;
}
System.arraycopy(old, 0, eventBuffer, 0, eventBufferPosition);
addToEventBuffer(b);
} else {
eventBuffer[eventBufferPosition++] = b;
}
}
private void addToAttributeBuffer(Attributes attributes) {
if (attributeCountBuffer.length - attributeCountBufferPosition == 1) {
// double the array
int[] old = attributeCountBuffer;
try {
attributeCountBuffer = new int[old.length * 3 / 2 + 1];
} catch (Error e) {
System.out.println("Out of memory: " + old.length);
throw e;
}
System.arraycopy(old, 0, attributeCountBuffer, 0, attributeCountBufferPosition);
addToAttributeBuffer(attributes);
} else {
final int count = attributes.getLength();
attributeCountBuffer[attributeCountBufferPosition++] = count;
attributeCount += count;
for (int i = 0; i < attributes.getLength(); i++) {
StringBuilder.add(attributes.getURI(i));
StringBuilder.add(attributes.getLocalName(i));
StringBuilder.add(attributes.getQName(i));
StringBuilder.add(attributes.getType(i));
StringBuilder.add(attributes.getValue(i));
}
}
}
public void writeExternal(ObjectOutput out) throws IOException {
out.writeInt(eventBufferPosition);
out.write(eventBuffer, 0, eventBufferPosition);
out.writeInt(charBufferPosition);
for (int i = 0; i < charBufferPosition; i++)
out.writeChar(charBuffer[i]);
out.writeInt(intBufferPosition);
for (int i = 0; i < intBufferPosition; i++)
out.writeInt(intBuffer[i]);
out.writeInt(lineBufferPosition);
for (int i = 0; i < lineBufferPosition; i++)
out.writeInt(lineBuffer[i]);
out.writeInt(systemIdBufferPosition);
for (int i = 0; i < systemIdBufferPosition; i++) {
final String systemId = systemIdBuffer[i];
out.writeObject(systemId == null ? "" : systemId);
}
out.writeInt(attributeCountBufferPosition);
for (int i = 0; i < attributeCountBufferPosition; i++)
out.writeInt(attributeCountBuffer[i]);
out.writeInt(StringBuilder.size());
for (int i = 0; i < StringBuilder.size(); i++)
out.writeObject(StringBuilder.get(i));
out.writeBoolean(hasDocumentLocator);
out.writeObject(publicId == null ? "" : publicId);
if (marks == null || marks.isEmpty()) {
out.writeInt(0);
} else {
out.writeInt(marks.size());
for (final Mark mark : marks) {
out.writeObject(mark.id);
out.writeInt(mark.eventBufferPosition);
out.writeInt(mark.charBufferPosition);
out.writeInt(mark.intBufferPosition);
out.writeInt(mark.lineBufferPosition);
out.writeInt(mark.systemIdBufferPosition);
out.writeInt(mark.attributeCountBufferPosition);
out.writeInt(mark.StringBuilderPosition);
}
}
out.flush();
}
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
eventBufferPosition = in.readInt();
eventBuffer = new byte[eventBufferPosition];
for (int i = 0; i < eventBufferPosition; i++)
eventBuffer[i] = in.readByte();
charBufferPosition = in.readInt();
charBuffer = new char[charBufferPosition];
for (int i = 0; i < charBufferPosition; i++)
charBuffer[i] = in.readChar();
intBufferPosition = in.readInt();
intBuffer = new int[intBufferPosition];
for (int i = 0; i < intBufferPosition; i++)
intBuffer[i] = in.readInt();
lineBufferPosition = in.readInt();
lineBuffer = new int[lineBufferPosition];
for (int i = 0; i < lineBufferPosition; i++)
lineBuffer[i] = in.readInt();
systemIdBufferPosition = in.readInt();
systemIdBuffer = new String[systemIdBufferPosition];
for (int i = 0; i < systemIdBufferPosition; i++) {
systemIdBuffer[i] = (String) in.readObject();
if ("".equals(systemIdBuffer[i]))
systemIdBuffer[i] = null;
}
attributeCountBufferPosition = in.readInt();
attributeCountBuffer = new int[attributeCountBufferPosition];
for (int i = 0; i < attributeCountBufferPosition; i++) {
final int count = in.readInt();
attributeCountBuffer[i] = count;
attributeCount += count;
}
final int StringBuilderSize = in.readInt();
for (int i = 0; i < StringBuilderSize; i++)
StringBuilder.add((String) in.readObject());
hasDocumentLocator = in.readBoolean();
publicId = (String) in.readObject();
if ("".equals(publicId))
publicId = null;
final int marksCount = in.readInt();
if (marksCount > 0) {
for (int i = 0; i < marksCount; i++) {
final String id = (String) in.readObject();
int[] values = new int[7];
for (int j = 0; j < 7; j++)
values[j] = in.readInt();
new Mark(values, id);
}
}
}
}