/*
* Copyright 2007 Edward Kuns
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $Id: XMLDiff.java 0000 2007-01-11 00:00:00Z ekuns $
*/
package org.castor.xmlctf.xmldiff;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.StringTokenizer;
import org.castor.xmlctf.xmldiff.xml.XMLFileReader;
import org.castor.xmlctf.xmldiff.xml.nodes.Attribute;
import org.castor.xmlctf.xmldiff.xml.nodes.Element;
import org.castor.xmlctf.xmldiff.xml.nodes.ParentNode;
import org.castor.xmlctf.xmldiff.xml.nodes.Root;
import org.castor.xmlctf.xmldiff.xml.nodes.XMLNode;
/**
* A utility class used to compare two XMLNodes, or XML input files and report
* the differences between them.
*
* @author <a href="mailto:edward.kuns@aspect.com">Edward Kuns</a>
* @version $Revision: 0000 $ $Date: 2007-01-11 00:00:00 -0600 (Thu, 11 Jan 2007) $
* @since Castor 1.1
*/
public class XMLDiff {
/** The namespace of XML Schema. */
private static final String XMLSCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance";
/** Filename of the 1st XML item being compared. */
private final String _file1;
/** Filename of the 2nd XML item being compared. */
private final String _file2;
/** PrintWriter used for this comparison. */
private final PrintWriter _pw = new PrintWriter(System.out, true);
/**
* A boolean to indicate whether or not child order should be strictly
* enforced. By default child order is treated as "all", meaning that as
* long as a child exists, it doesn't matter which order the children occur.
*/
private boolean _strictChildOrder = false;
/** True if we should print errors to the PrintWriter. */
private boolean _print = true;
/** True if we have not yet printed the diff header. */
private boolean _header = true;
/**
* Constructs an XMLDiff object that is ready to compare the two provided
* XML files.
*
* @param file1 The first XML file for comparison.
* @param file2 The second XML file for comparison.
*/
public XMLDiff(final String file1, final String file2) {
if (file1 == null) {
String err = "The argument 'file1' may not be null.";
throw new IllegalArgumentException(err);
}
if (file2 == null) {
String err = "The argument 'file2' may not be null.";
throw new IllegalArgumentException(err);
}
_file1 = file1;
_file2 = file2;
}
/**
* Compares the two XML documents located at the given URL locations.
* Returns 0, if no differences are found, otherwise returns a positive
* number indicating the number of differences.
* <p>
* This is the only public method in this class.
*
* @return 0, if no differences are found, otherwise a positive number
* indicating the number of differences.
* @throws java.io.IOException an this occurs while reading
*/
public int compare() throws java.io.IOException {
XMLFileReader reader1 = new XMLFileReader(_file1);
XMLNode node1 = reader1.read();
XMLFileReader reader2 = new XMLFileReader(_file2);
XMLNode node2 = reader2.read();
return compareNodes(node1, node2);
}
/**
* Compares the given XMLNodes. Returns 0 if no differences are found,
* otherwise returns a positive number indicating the number of differences.
*
* @param node1 the first XMLNode to compare
* @param node2 the second XMLNode to compare
* @return 0, if no differences are found, otherwise a positive number
* indicating the number of differences.
*/
private int compareNodes(final XMLNode node1, final XMLNode node2) {
// Compare node types
if (!hasSameType(node1, node2)) {
if (_print) {
_pw.println("Types differ: <" + node1.getLocalName() + "> and <"
+ node2.getLocalName() + "> for" + node1.getNodeLocation());
}
return 1;
}
int diffCount = 0;
String ns1 = node1.getNamespaceURI();
String ns2 = node2.getNamespaceURI();
if (!compareTextNullEqualsEmpty(ns1, ns2)) {
if (_print) {
_pw.println("Namespaces differ: ('" + ns1 + "' != '" + ns2 + "') for "
+ node1.getNodeLocation());
}
++diffCount;
}
// Compare local names (if both are null it's OK)
String name1 = node1.getLocalName();
String name2 = node2.getLocalName();
if (name1 == null && name2 != null) {
if (_print) {
_pw.println("Names differ: null vs. <" + name2 + "> for "
+ node1.getNodeLocation());
}
++diffCount;
return diffCount;
} else if (name2 == null && name1 != null) {
if (_print) {
_pw.println("Names differ: <" + name1 + "> vs null for "
+ node1.getNodeLocation());
}
++diffCount;
return diffCount;
} else if (name1 != null && !name1.equals(name2)) {
if (_print) {
_pw.println("Names differ: <" + name1 + "> != <" + name2 + "> for "
+ node1.getNodeLocation());
}
++diffCount;
return diffCount;
}
// Compare node content
switch (node1.getNodeType()) {
case XMLNode.ROOT:
diffCount += compareElementsStrictOrder((Root)node1, (Root)node2);
break;
case XMLNode.ELEMENT:
diffCount += compareElements((Element)node1, (Element)node2);
break;
case XMLNode.ATTRIBUTE:
diffCount += compareStringValues(node1, node2);
break;
case XMLNode.TEXT:
diffCount += compareStringValues(node1, node2);
break;
case XMLNode.PROCESSING_INSTRUCTION:
// We don't care about comparing processing instructions
break;
default:
System.out.println("Unexpected node type in XMLDiff: " + node1.getNodeType());
break;
}
return diffCount;
} // -- compare
/**
* Compares the String values of the provided XML Nodes.
* @param node1 The first node to be compared
* @param node2 The second node to be compared
* @return 0 if the String values are the same, 1 otherwise
*/
private int compareStringValues(final XMLNode node1, final XMLNode node2) {
if (compareText(node1.getStringValue(), node2.getStringValue())) {
return 0;
}
if (_print) {
_pw.println();
printLocationInfo(node1, node2);
printText("- ", node1.getStringValue());
_pw.println();
printText("+ ", node2.getStringValue());
}
return 1;
}
/**
* Checks the attributes of the given nodes to make sure they are identical
* but does not care about the attribute order, as per XML 1.0.
*/
private int compareAttributes(final Element node1, final Element node2) {
int diffCount = 0;
for (Iterator i = node1.getAttributeIterator(); i.hasNext(); ) {
Attribute attr1 = (Attribute) i.next();
// Does node2 have this attribute at all?
String attValue2 = node2.getAttribute(attr1.getNamespaceURI(), attr1.getLocalName());
if (attValue2 == null) {
// Is this an attribute that is allowed to be missing sometimes?
if (missingattributeIsIgnorable(attr1)) {
continue;
}
// If not, complain
printElementChangeBlock(node1, node2, "Attribute '"
+ attr1.getNodeLocation()
+ "' does not exist in the second document.");
diffCount++;
continue;
}
// If it does, does it have the same value?
String attValue1 = attr1.getStringValue();
if (!compareTextLikeQName(node1, node2, attValue1, attValue2)) {
printElementChangeBlock(node1, node2, "Attribute '"
+ attr1.getNodeLocation()
+ "' values are different.");
diffCount++;
}
}
// Look for attributes on node 2 that are not on node 1
for (Iterator i = node2.getAttributeIterator(); i.hasNext(); ) {
Attribute attr2 = (Attribute) i.next();
if (node1.getAttribute(attr2.getNamespaceURI(), attr2.getLocalName()) == null) {
// Is this an attribute that is allowed to be missing sometimes?
if (missingattributeIsIgnorable(attr2)) {
continue;
}
// If not, complain
printElementChangeBlock(node1, node2, "Attribute '"
+ attr2.getNodeLocation()
+ "' does not exist in the first document.");
diffCount++;
}
}
return diffCount;
}
private boolean missingattributeIsIgnorable(Attribute attr) {
String name = attr.getLocalName();
String ns = attr.getNamespaceURI();
if (ns == null) {
ns = "";
}
if (name.equals("noNamespaceSchemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
return true;
}
if (name.equals("schemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
return true;
}
return false;
}
/**
* Compare the provided attribute text as if it were a QName.
*
* @param node1 Node containing attribute 1
* @param node2 Node containing attribute 2
* @param attValue1 String value of attribute 1
* @param attValue2 String value of attribute 2
* @return true if the attributes are equal directly, or equal when compared
* as QNames
*/
private boolean compareTextLikeQName(final XMLNode node1, final XMLNode node2,
final String attValue1, final String attValue2) {
// If strings are equal, return equal
if (compareText(attValue1, attValue2)) {
return true;
}
// If neither attribute value has ":" then return false
final int idx1 = attValue1.indexOf(':');
final int idx2 = attValue2.indexOf(':');
if (idx1 < 0 && idx2 < 0) {
return false;
}
final String prefix1;
final String prefix2;
final String value1;
final String value2;
if (idx1 >= 0) {
value1 = attValue1.substring(idx1 + 1);
prefix1 = attValue1.substring(0, idx1);
} else {
value1 = attValue1;
prefix1 = "";
}
if (idx2 >= 0) {
value2 = attValue2.substring(idx2 + 1);
prefix2 = attValue2.substring(0, idx2);
} else {
value2 = attValue2;
prefix2 = "";
}
// Return true if text value is equal and namesspaces are equal
return compareText(value1, value2)
&& compareTextNullEqualsEmpty(node1.getNamespaceURI(prefix1),
node2.getNamespaceURI(prefix2));
}
/**
* Compares the two XMLNodes, both of which must be of type XMLNode.ELEMENT
* or XMLNode.ROOT.
*
* @param node1 the primary XMLNode to comapare against
* @param node2 the XMLNode to compare against node1
* @return the number of differences found in this document tree
*/
private int compareElements(final Element node1, final Element node2) {
int diffCount = compareAttributes(node1, node2);
if (_strictChildOrder) {
diffCount += compareElementsStrictOrder(node1, node2);
} else {
diffCount += compareElementsLooseOrder(node1, node2);
}
return diffCount;
}
/**
* Compares the two XMLNodes (not counting attributes) requiring strict
* child order, both of which must be of type XMLNode.ELEMENT or
* XMLNode.ROOT.
*
* @param node1 the primary XMLNode to comapare against
* @param node2 the XMLNode to compare against node1
* @return the number of differences found in this document tree
*/
private int compareElementsStrictOrder(final ParentNode node1, final ParentNode node2) {
int diffCount = 0;
Iterator i1 = node1.getChildIterator();
Iterator i2 = node2.getChildIterator();
// Skip all ignorable whitespace and compare with strict order
if (i1.hasNext() && i2.hasNext()) {
XMLNode child1 = (XMLNode) i1.next();
XMLNode child2 = (XMLNode) i2.next();
while (child1 != null && child2 != null) {
if (nodeIsIgnorableText(child1)) {
if (!i1.hasNext()) {
break;
}
child1 = (XMLNode) i1.next();
continue;
}
if (nodeIsIgnorableText(child2)) {
if (!i2.hasNext()) {
break;
}
child2 = (XMLNode) i2.next();
continue;
}
diffCount += compareNodes(child1, child2);
if (!i1.hasNext() || !i2.hasNext()) {
break;
}
child1 = (XMLNode) i1.next();
child2 = (XMLNode) i2.next();
}
}
// If we have excess nodes for root1, complain about missing elements
while (i1.hasNext()) {
XMLNode child1 = (XMLNode) i1.next();
if (!nodeIsIgnorableText(child1)) {
if (_print) {
printLocationInfo(child1, null);
_pw.println("- ");
}
++diffCount;
}
}
// If we have excess nodes for root2, complain about extra elements
while (i2.hasNext()) {
XMLNode child2 = (XMLNode) i2.next();
if (!nodeIsIgnorableText(child2)) {
if (_print) {
printLocationInfo(child2, null);
_pw.println("- ");
}
++diffCount;
}
}
return diffCount;
}
/**
* Compares the two XMLNodes, both of which must be of type XMLNode.ELEMENT
* or XMLNode.ROOT.
*
* @param node1 the primary XMLNode to comapare against
* @param node2 the XMLNode to compare against node1
* @return the number of differences found in this document tree
*/
private int compareElementsLooseOrder(final Element node1, final Element node2) {
int diffCount = 0;
final List used = new LinkedList();
for (Iterator i1 = node1.getChildIterator(); i1.hasNext(); ) {
XMLNode child1 = (XMLNode) i1.next();
// Ignore whitespace
// If we find an exact match, continue with the next node in the list
if (nodeIsIgnorableText(child1) || foundExactMatch(node2, child1, used)) {
continue;
}
// Check for the best match and use it to count diffs & complain
if (_print) {
diffCount += closestMatchDifference(node2, child1, used);
} else {
diffCount++;
}
}
// Complain about all children of node2 that are not used and not ignorable whitespace
for (Iterator i2 = node2.getChildIterator(); i2.hasNext(); ) {
XMLNode child2 = (XMLNode) i2.next();
if (!nodeIsIgnorableText(child2) && !used.contains(child2)) {
if (_print) {
_pw.println("Extra child node: " + child2.getNodeLocation());
}
++diffCount;
}
}
return diffCount;
}
/**
* Looks for an exact match for the provided target XMLNode. If found,
* returns true. Suppresses complaints during search. If an exact match is
* found, the match is added to the list of "used" items.
*
* @param parent The node whose children to search for an exact match
* @param target The XMLNode we are trying to match
* @param usedList The list of children of node2 that have already matched
* other objects
* @return true if an exact match is found for the provided node.
*/
private boolean foundExactMatch(final Element parent, XMLNode target, final List usedList) {
// Suppress complaints when we are looking for an exact match.
boolean previousPrint = _print;
_print = false; // Suppress printing when we are "just looking"
boolean found = false;
for (Iterator i2 = parent.getChildIterator(); i2.hasNext(); ) {
XMLNode child2 = (XMLNode) i2.next();
if (!usedList.contains(child2) && compareNodes(target, child2) == 0) {
usedList.add(child2);
found = true;
break;
}
}
// Restore printing
_print = previousPrint;
return found;
}
/**
* Looks for a close patch to the provided target XMLNode among the children
* of the provided parent node. The difference between the closest match
* and the target is returned. If we cannot even find a close match, then
* we declare the target missing and return one difference.
* <p>
* Note: This method is only called when printing is enabled.
*
* @param parent The node whose children to search for an exact match
* @param target The XMLNode we are trying to match
* @param usedList The list of children of node2 that have already matched
* other objects
* @return the difference count
*/
private int closestMatchDifference(final Element parent, final XMLNode target,
final List usedList) {
for (Iterator i2 = parent.getChildIterator(); i2.hasNext(); ) {
XMLNode child2 = (XMLNode) i2.next();
if (!usedList.contains(child2) && hasSameType(target, child2)
&& hasSameName(target, child2)) {
usedList.add(child2);
return compareNodes(target, child2);
}
}
_pw.println("Missing child node: " + target.getNodeLocation() + " for "
+ target.getNodeLocation());
return 1;
}
/**
* Returns true if the given node is a TEXT node that contains only
* ignorable whitespace.
*
* @return true if the given node is a TEXT node that contains only
* ignorable whitespace.
*/
private boolean nodeIsIgnorableText(final XMLNode child) {
return (child.getNodeType() == XMLNode.TEXT && compareText(child.getStringValue(), ""));
}
/**
* Returns true if the two Strings are equal, ignoring whitespace
* differences that are ignorable.
*
* @return true if the two Strings are equal, ignoring whitespace
* differences that are ignorable.
*/
private boolean compareText(final String s1, final String s2) {
if (s1.equals(s2)) {
return true;
}
// Strings are different; compare token by token to ignore whitespace differences
StringTokenizer st1 = new StringTokenizer(s1);
StringTokenizer st2 = new StringTokenizer(s2);
while (st1.hasMoreTokens() && st2.hasMoreTokens()) {
if (!st1.nextToken().equals(st2.nextToken())) {
return false;
}
}
// If the Strings have different numbers of tokens, fail
if (st1.hasMoreTokens() || st2.hasMoreTokens()) {
return false;
}
return true;
}
/**
* Compares two strings. Considers null Strings to be the same as an empty
* String.
*
* @param one The first string to compare.
* @param two The second string to compare.
* @return true if the two strings are equals or are both "null or empty"
*/
private boolean compareTextNullEqualsEmpty(String one, String two) {
String text1 = (one == null) ? "" : one;
String text2 = (two == null) ? "" : two;
return text1.equals(text2);
}
private boolean hasSameName(final XMLNode node1, final XMLNode node2) {
String name1 = node1.getLocalName();
String name2 = node2.getLocalName();
// ROOT may or may not have null name, so we must check for possible null values
if (name1 == null) {
return (name2 == null);
}
return name1.equals(name2);
}
private boolean hasSameType(final XMLNode node1, final XMLNode node2) {
return (node1.getNodeType() == node2.getNodeType());
}
private void printLocationInfo(final XMLNode node1, final XMLNode node2) {
if (_header) {
_header = false;
_pw.println("--- " + _file1);
_pw.println("+++ " + _file2);
}
_pw.print("@@ -");
_pw.print(node1.getNodeLocation());
_pw.print(" +");
_pw.print(node2.getNodeLocation());
_pw.println(" @@");
}
private void printElementChangeBlock(final Element node1, final Element node2, final String msg) {
if (_print) {
_pw.print("- ");
printElement(node1);
_pw.print("+ ");
printElement(node2);
if (msg != null) {
_pw.println(msg);
}
}
}
private void printElement(final Element node) {
_pw.print('<' + node.getLocalName());
for (Iterator i = node.getAttributeIterator(); i.hasNext(); ) {
Attribute attr = (Attribute) i.next();
_pw.print(' ');
_pw.print(attr.getLocalName());
_pw.print("=\"");
_pw.print(attr.getStringValue());
_pw.print("\"");
}
_pw.println('>');
}
/**
* Prints the given text. Each line of the text is prefixed with the given
* prefix. If <code>text</code> has multiple newlines, the prefix will be
* printed on each line.
*
* @param prefix A prefix to display on each line of output
* @param text The text to display
*/
private void printText(final String prefix, String text) {
if (text == null) {
_pw.println(prefix);
return;
}
int idx = 0;
while ((idx = text.indexOf('\n')) >= 0) {
_pw.print(prefix);
_pw.println(text.substring(0, idx));
text = text.substring(idx + 1);
}
_pw.print(prefix);
_pw.println(text);
}
}