package org.exist.versioning;
import bmsi.util.Diff;
import org.apache.log4j.Logger;
import org.exist.dom.DocumentImpl;
import org.exist.dom.NodeProxy;
import org.exist.dom.QName;
import org.exist.numbering.NodeId;
import org.exist.stax.EmbeddedXMLStreamReader;
import org.exist.storage.DBBroker;
import org.exist.util.serializer.Receiver;
import org.exist.util.serializer.SAXSerializer;
import org.exist.util.serializer.SerializerPool;
import org.xml.sax.SAXException;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.OutputKeys;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Map;
import java.util.TreeMap;
import java.util.Iterator;
public class StandardDiff implements org.exist.versioning.Diff {
private final static Logger LOG = Logger.getLogger(StandardDiff.class);
public final static String NAMESPACE = "http://exist-db.org/versioning";
public final static String PREFIX = "v";
private final static QName DIFF_ELEMENT = new QName("diff", NAMESPACE, PREFIX);
private DBBroker broker;
private List changes = null;
public StandardDiff(DBBroker broker) {
this.broker = broker;
}
public void diff(DocumentImpl docA, DocumentImpl docB)
throws DiffException {
try {
DiffNode[] nodesA = getNodes(broker, docA);
DiffNode[] nodesB = getNodes(broker, docB);
if (LOG.isTraceEnabled()) {
LOG.trace("Source:");
debugNodes(nodesA);
LOG.trace("Modified:");
debugNodes(nodesB);
}
Diff diff = new Diff(nodesA, nodesB);
Diff.change script = diff.diff_2(false);
changes = getChanges(script, docA, docB, nodesA, nodesB);
} catch (XMLStreamException e) {
throw new DiffException(e.getMessage(), e);
} catch (IOException e) {
throw new DiffException(e.getMessage(), e);
}
}
private void debugNodes(DiffNode[] nodes) {
StringBuffer buf = new StringBuffer();
buf.append('\n');
for (int i = 0; i < nodes.length; i++) {
DiffNode node = nodes[i];
buf.append(Integer.toString(i)).append(' ').append(node.toString()).append('\n');
}
LOG.trace(buf.toString());
}
public String diff2XML() throws DiffException {
try {
StringWriter writer = new StringWriter();
SAXSerializer sax = (SAXSerializer) SerializerPool.getInstance().borrowObject(
SAXSerializer.class);
Properties outputProperties = new Properties();
outputProperties.setProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
outputProperties.setProperty(OutputKeys.INDENT, "no");
sax.setOutput(writer, outputProperties);
sax.startDocument();
diff2XML(sax);
sax.endDocument();
return writer.toString();
} catch (SAXException e) {
throw new DiffException("error while serializing diff: " + e.getMessage(), e);
}
}
public void diff2XML(Receiver receiver ) throws DiffException {
try {
receiver.startElement(DIFF_ELEMENT, null);
for (int i = 0; i < changes.size(); i++) {
Difference diff = (Difference) changes.get(i);
diff.serialize(broker, receiver);
}
receiver.endElement(DIFF_ELEMENT);
receiver.endDocument();
} catch (SAXException e) {
throw new DiffException("error while serializing diff: " + e.getMessage(), e);
}
}
protected List getChanges(Diff.change script, DocumentImpl docA, DocumentImpl docB, DiffNode[] nodesA, DiffNode[] nodesB) throws XMLStreamException {
List changes = new ArrayList();
Map inserts = new TreeMap();
Diff.change next = script;
while (next != null) {
int start0 = next.line0;
int start = next.line1;
int last = start + next.inserted;
int lastDeleted = start0 + next.deleted;
if (next.inserted > 0) {
if (next.deleted == 0) {
// Simplify edit script: if there's a set of start tags at the end of the
// insertion, check if they correspond to similar start tags *before* the
// inserted section. If yes, move the inserted range to match the entire
// inserted element instead of a sequence of end/start tags.
int offsetFix = 0;
for (int i = last - 1; i > start; i--) {
DiffNode node = nodesB[i];
if (node.nodeType == XMLStreamReader.START_ELEMENT && start - (last - i) > 0) {
DiffNode before = nodesB[start - (last - i)];
if (before.nodeType == XMLStreamReader.START_ELEMENT &&
before.qname.equals(node.qname))
offsetFix++;
} else
break;
}
if (offsetFix > 0) {
start = start - offsetFix;
start0 = start0 - offsetFix;
last = start + next.inserted;
}
}
Difference.Insert diff;
if (nodesA[start0].nodeType == XMLStreamReader.END_ELEMENT) {
diff = new Difference.Append(new NodeProxy(docA, nodesA[start0].nodeId), docB);
changes.add(diff);
} else {
diff = (Difference.Insert) inserts.get(nodesA[start0].nodeId);
if (diff == null) {
diff = new Difference.Insert(new NodeProxy(docA, nodesA[start0].nodeId), docB);
inserts.put(nodesA[start0].nodeId, diff);
}
}
// now scan the chunk and collect the nodes
DiffNode[] nodes = new DiffNode[last - start];
int j = 0;
for (int i = start; i < last; i++, j++) {
if (LOG.isTraceEnabled())
LOG.trace(Integer.toString(i) + " " + nodesB[i]);
nodes[j] = nodesB[i];
}
diff.addNodes(nodes);
}
if (next.deleted > 0) {
// This is a simple test to correct an issue when two nodes of the same
// node-name are siblings and the first is deleted. What happens is the first
// element doesn't get it's start node deleted and the second does. So
// the second element basically ends up with the first one's start element.
// Which causes problems for the second element's attributes.
DiffNode beforeElement = nodesA[start0 - 1];
DiffNode lastElement = nodesA[lastDeleted - 1];
if(beforeElement.qname != null
&& lastElement.qname != null
&& beforeElement.qname.equals(lastElement.qname)
&& beforeElement.nodeType == XMLStreamReader.START_ELEMENT
&& lastElement.nodeType == XMLStreamReader.START_ELEMENT) {
start0--;
lastDeleted--;
}
if (LOG.isTraceEnabled())
LOG.trace("Deleted: " + start0 + " last: " + lastDeleted);
for (int i = start0; i < lastDeleted; i++) {
boolean elementDeleted = false;
if (nodesA[i].nodeType == XMLStreamReader.START_ELEMENT) {
for (int j = i; j < lastDeleted; j++) {
if (nodesA[j].nodeType == XMLStreamReader.END_ELEMENT &&
nodesA[j].nodeId.equals(nodesA[i].nodeId)) {
Difference.Delete diff = new Difference.Delete(new NodeProxy(docA, nodesA[i].nodeId));
changes.add(diff);
i = j;
elementDeleted = true;
break;
}
}
}
if (!elementDeleted) {
Difference.Delete diff = new Difference.Delete(nodesA[i].nodeType, new NodeProxy(docA, nodesA[i].nodeId));
changes.add(diff);
}
}
}
next = next.link;
}
for (Iterator i = inserts.values().iterator(); i.hasNext();) {
changes.add(i.next());
}
return changes;
}
protected DiffNode[] getNodes(DBBroker broker, DocumentImpl root) throws XMLStreamException, IOException {
EmbeddedXMLStreamReader reader = broker.newXMLStreamReader(new NodeProxy(root, NodeId.DOCUMENT_NODE, root.getFirstChildAddress()), false);
List nodes = new ArrayList();
DiffNode node;
while (reader.hasNext()) {
int status = reader.next();
NodeId nodeId = (NodeId) reader.getProperty(EmbeddedXMLStreamReader.PROPERTY_NODE_ID);
switch (status) {
case XMLStreamReader.START_ELEMENT:
node = new DiffNode(nodeId, status, reader.getQName());
nodes.add(node);
for (int i = 0; i < reader.getAttributeCount(); i++) {
nodeId = reader.getAttributeId(i);
String value = reader.getAttributeQName(i).getStringValue() + '=' +
reader.getAttributeValue(i);
node = new DiffNode(nodeId, XMLStreamReader.ATTRIBUTE, value);
nodes.add(node);
}
break;
case XMLStreamReader.END_ELEMENT:
node = new DiffNode(nodeId, status, reader.getQName());
nodes.add(node);
break;
case XMLStreamReader.CHARACTERS:
case XMLStreamReader.COMMENT:
node = new DiffNode(nodeId, status, reader.getText());
nodes.add(node);
break;
case XMLStreamReader.PROCESSING_INSTRUCTION:
String value = reader.getPITarget() + " " + reader.getPIData();
nodes.add(new DiffNode(nodeId, status, value));
break;
}
}
DiffNode[] array = new DiffNode[nodes.size()];
return (DiffNode[]) nodes.toArray(array);
}
}