/*
* Copyright 2007 - 2017 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.sf.jailer;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Compares XML-documents.
*
* @author Ralf Wisser
*/
public class XmlDocumentComparator {
/**
* Checks whether all information in a xml-document
* can also be found in another document.
*
* @param part first xml-document
* @param doc second xml-document
* @return <code>true</code> iff all information in <code>part</code>
* can also be found in <code>doc</code>
*/
static public boolean contains(Document part, Document doc) {
Collection<Element> remainingLeafs = new ArrayList<Element>();
boolean c = contains(part.getDocumentElement(), doc.getDocumentElement(), remainingLeafs);
dumpLeafs("remaining leafs", remainingLeafs);
return c;
}
/**
* Checks whether all information in a xml-document
* can also be found in another document and vice versa.
*
* @param a first xml-document
* @param b second xml-document
* @return <code>true</code> iff all information in <code>a</code>
* can also be found in <code>b</code> and vice versa
*/
static public boolean equals(Document a, Document b) {
Collection<Element> remainingLeafsA = new ArrayList<Element>();
Collection<Element> remainingLeafsB = new ArrayList<Element>();
boolean c = contains(a.getDocumentElement(), b.getDocumentElement(), remainingLeafsA)
&& contains(b.getDocumentElement(), a.getDocumentElement(), remainingLeafsB);
dumpLeafs("remaining leafs", remainingLeafsA);
dumpLeafs("remaining leafs", remainingLeafsB);
return c;
}
/**
* Parses a XML-file.
*
* @param xmlFile the XML file
* @return the DOM-tree
*/
public static Document parseXmlDocument(File xmlFile) throws ParserConfigurationException, FileNotFoundException, SAXException, IOException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new InputSource(new FileReader(xmlFile)));
return document;
}
/**
* Checks whether all information in a xml-element-tree
* can also be found in another tree.
*
* @param part first xml-element-tree
* @param doc second xml-element-tree
* @param remainingLeafs collection to put in all leafs from part which cannot be found in doc
* @return <code>true</code> iff all information in <code>part</code>
* can also be found in <code>doc</code>
*/
static public boolean contains(Element part, Element doc, Collection<Element> remainingLeafs) {
if (!part.getTagName().equals(doc.getTagName())) {
remainingLeafs.addAll(leafs(part));
return false;
}
if (part.getChildNodes().getLength() == 0) {
String partText = part.getTextContent() == null? "" : part.getTextContent().trim();
String docText = doc.getTextContent() == null? "" : doc.getTextContent().trim();
if (!partText.equals(docText)) {
remainingLeafs.add(part);
return false;
}
}
// TODO: check attributes too
boolean docContainsPart = true;
for (int i = 0; i < part.getChildNodes().getLength(); ++i) {
if (!(part.getChildNodes().item(i) instanceof Element)) continue;
Element partChild = (Element) part.getChildNodes().item(i);
// search child in doc, child-order is not relevant
Collection<Element> remainingLeafsOfAllChildren = null;
boolean partChildFoundInDoc = false;
for (int j = 0; j < doc.getChildNodes().getLength(); ++j) {
if (!(doc.getChildNodes().item(j) instanceof Element)) continue;
Element docChild = (Element) doc.getChildNodes().item(j);
Collection<Element> remainingLeafsOfChild = new ArrayList<Element>();
if (contains(partChild, docChild, remainingLeafsOfChild)) {
partChildFoundInDoc = true;
break;
}
if (remainingLeafsOfAllChildren == null) {
remainingLeafsOfAllChildren = remainingLeafsOfChild;
} else {
remainingLeafsOfAllChildren.retainAll(remainingLeafsOfChild);
}
}
if (!partChildFoundInDoc) {
if (remainingLeafsOfAllChildren != null) {
remainingLeafs.addAll(remainingLeafsOfAllChildren);
} else {
System.out.print("");
}
docContainsPart = false;
}
}
return docContainsPart;
}
/**
* Collects all leafs of a element-tree.
*
* @param root the root of the tree
* @return all leafs of the element-tree
*/
private static Collection<Element> leafs(Element root) {
Collection<Element> leafs = new ArrayList<Element>();
boolean isLeaf = true;
for (int i = 0; i < root.getChildNodes().getLength(); ++i) {
if (root.getChildNodes().item(i) instanceof Element) {
Element child = (Element) root.getChildNodes().item(i);
leafs.addAll(leafs(child));
isLeaf = false;
}
}
if (isLeaf) {
leafs.add(root);
}
return leafs;
}
/**
* Dumps a set of leafs.
*
* @param title the title
* @param leafs the set of leafs
*/
private static void dumpLeafs(String title, Collection<Element> leafs) {
if (leafs.isEmpty()) {
return;
}
System.out.println(title + ":");
for (Iterator<Element> i = leafs.iterator(); i.hasNext(); ) {
Element e = i.next();
String path = "";
for (Node n = e; n != null; n = n.getParentNode()) {
path = n.getNodeName() + "/" + path;
}
System.out.println(path + ":" + e.getTextContent());
}
}
}