package org.nextprot.api.commons.utils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.w3c.dom.Node;
import difflib.Delta;
import difflib.DiffUtils;
import difflib.Patch;
public class XmlComparator {
static public int getLineLevel(String line) {
String s = line.substring(0,2);
int level = Integer.parseInt(s);
return level;
}
static public List<String> getNodeTreeAsList2(Node n, String parent, String linesToIgnore, boolean sortList, boolean showList, boolean sortUnique, boolean singleAttrByLine) {
return getNodeTreeAsList2(n, parent, linesToIgnore, sortList, showList, sortUnique, singleAttrByLine, false);
}
/**
* Returns a list of String representing the XML Node n and its sub components (attrs, text node and sub-elements).
* The list can optionally be sorted alphabetically (useful for comparing 2 lists when the order of sub-components is not controlled.
* The list can optionally be sent to System.out for debug purpose.
* The parent String is inserted as the first line of Lines returned and is just used as a title for the list of lines.
* @param n the XML node to represent as a List of String
* @param parent is displayed as a prefix when the List is written to System.out
* @param linesToIgnore pipe separated list of patterns causing the line containing one of them not to be included in the returned String List
* @param sortList decides if the list of String has to be sorted before being returned, set it to True when the order of sube-components of Node n is uncontrolled
* @param showListdecides if the List is sent to System.out before returning
* @return the List of String representing the Node branch
*/
static public List<String> getNodeTreeAsList2(Node n, String parent, String linesToIgnore, boolean sortList, boolean showList, boolean sortUnique, boolean singleAttrByLine, boolean ignoreChildrenLinesAsWell) {
// generate xml as List of String lines
List<String> lines = new ArrayList<String>();
buildNodeTree2(n, "" , null, lines, singleAttrByLine);
// remove lines matching patterns to ignore
Set<String> patterns = linesToIgnore==null ? new HashSet<String>() : new HashSet<String>(Arrays.asList(linesToIgnore.toLowerCase().split("\\|")));
List<String> filteredLines = new ArrayList<String>();
boolean ignoreLine = false;
int lastLevelIgnored = 1000;
for (String line:lines) {
int level = getLineLevel(line);
if (level<=lastLevelIgnored && ignoreLine==true) {
ignoreLine=false; // reset to false when current level < last level (current line not a child of last line ignored
lastLevelIgnored=1000;
}
if (ignoreChildrenLinesAsWell==false) ignoreLine=false; // init to false for each line if ignoreChildrenLinesAsWell is false
for (String pattern: patterns) {
if (pattern.length()>0 && line.contains(pattern)) {
if (ignoreLine==false) {
ignoreLine=true;
lastLevelIgnored=level;
}
break;
}
}
if (!ignoreLine) filteredLines.add(line);
//if (ignoreLine==true) System.out.println(parent + "ignored line:" + line);
}
// apply sort options
if (sortUnique) {
filteredLines = new ArrayList<String>(new TreeSet<String>(filteredLines)); // use set to make elements unique and sort them as well
} else if (sortList) {
Collections.sort(filteredLines);
}
// insert title line at top
filteredLines.add(0, parent);
// send to stdout if option is on
if (showList) for (int i=0;i<filteredLines.size();i++) System.out.println(parent + " - " + i + " - " + filteredLines.get(i));
return filteredLines;
}
/*
* Builds a tree by recursively traversing the branch of the node n.
* Each XML element becomes a line in lines.
* linesToIgnore are String patterns. If one is found in a line during Node conversion it is not included in the String List
* The parent should be an empty string if you want to later compare the lines of two different nodes
* Use a non empty parent String to tag sub-component of a node for debug purpose.
*/
static protected void buildNodeTree2(Node n, String parent, String level, List<String> lines, boolean singleAttrByLine) {
// increments level
if (level==null || level.length()==0) level = "0";
level = ""+ (Integer.parseInt(level)+1);
while (level.length()<2) level="0"+level;
String line = parent;
if (n.getNodeType()!=30) {
String name = n.getNodeName().toLowerCase();
line = line + "-" + name ;
String value = n.getNodeValue();
if (value==null) value = "";
value=value.toLowerCase().replaceAll("\\s","").trim();
if (name.equals("#text") && value.length()==0) return; // we ALWAYS ignore empty text nodes !
//if (name.equals("#text")) {}
if (value.length()>0) line = line + "=" + value;
if (singleAttrByLine) {
// one line for the element itself
lines.add(level + "|" + line);
// one more line for each attribute of the element
if (n.hasAttributes()) {
for (int i=0;i<n.getAttributes().getLength();i++) {
Node at = n.getAttributes().item(i);
String atName = at.getNodeName().toLowerCase();
String atValue = at.getNodeValue().toLowerCase().trim();
String line2 = line + "-attr-" + atName + "=" + atValue;
lines.add(level + "|" + line2);
}
}
} else {
// add all the attr info to the element line
if (n.hasAttributes()) {
for (int i=0;i<n.getAttributes().getLength();i++) {
Node at = n.getAttributes().item(i);
String atName = at.getNodeName().toLowerCase();
String atValue = at.getNodeValue().toLowerCase().trim();
line += "-attr-" + atName + "=" + atValue;
}
}
lines.add(level + "|" + line);
}
// recurse to child nodes
if (n.hasChildNodes()) {
for (int i=0;i<n.getChildNodes().getLength();i++) {
buildNodeTree2(n.getChildNodes().item(i), parent + "-" + name, level, lines, singleAttrByLine);
}
}
}
}
static public List<Delta> compareXmlNodes2AndGetDeltas(String tag, String lines2Ignore, String targetList, String replacmentList, Node n1, Node n2, boolean sortUnique, boolean singleAttrByLine, boolean ignoreChildrenLinesAsWell) {
List<String> original = getNodeTreeAsList2(n1, "n1-original", lines2Ignore, true, false, sortUnique, singleAttrByLine,ignoreChildrenLinesAsWell);
List<String> revised = getNodeTreeAsList2(n2, "n2-revised", lines2Ignore, true, false, sortUnique, singleAttrByLine,ignoreChildrenLinesAsWell);
filterList(original, targetList, replacmentList);
filterList(revised, targetList, replacmentList);
//showList("original>>>", original);
//showList("revised>>>", original);
Patch patch = DiffUtils.diff(original, revised);
return patch.getDeltas();
}
/**
* Compares 2 XML nodes after converting them in a sorted List of String.
* @param tag is displayed as a prefix when differences are written to System.out
* @param lines2Ignore pipe separated list of patterns. When a pattern in a String generated during the Node conversion into a List, the String is not added to the List and thuus ignored later during Node comparison
* @param targetList is a String containing values pipe separated to be replaced with replacmentList String in each line
* @param replacmentList String are pipe separated values to use in replacment for targetList String
* @param n1 Node to be compared
* @param n2 Node to be compared with
* @param sortUnique sorts the List of String representing the Node and removes doublons (each String in the List is unique)
* @param singleAttrByLine generates one String in the List for each attribute of each element
* @return true if no differences are found, else false. The list of differences are sent to System.out.
*/
static public boolean compareXmlNodes2(String tag, String lines2Ignore, String targetList, String replacmentList, Node n1, Node n2, boolean sortUnique, boolean singleAttrByLine, boolean ignoreChildrenLinesAsWell) {
List<Delta> deltas = compareXmlNodes2AndGetDeltas(tag, lines2Ignore, targetList, replacmentList, n1, n2, sortUnique, singleAttrByLine, ignoreChildrenLinesAsWell);
boolean status = true;
for (Delta delta: deltas) {
String deltaStr = delta.toString();
if (! deltaStr.contains("position: 0")) {
System.out.println(tag + " - delta: " + deltaStr);
status = false;
}
}
return status;
}
static void showList(String tag, List<String> list) {
for (int i=0;i<list.size();i++) {
System.out.println(tag + " - " + i + " - " + list.get(i));
}
}
static public void filterList(List<String> list, String targetList, String replacmentList) {
if (targetList!=null) {
String[] targets= targetList.split("\\|");
//System.out.println("targets size:" + targets.length);
String[] repls= replacmentList.split("\\|");
// un-escape my special (empty) value
// String.split() doesn't create an element if there is empty string after last sep found !
for (int i=0;i<repls.length;i++) { if (repls[i].equals("(empty)")) repls[i]=""; }
//System.out.println("repls size:" + repls.length);
for (int i=0;i<list.size();i++) {
for (int j=0;j<targets.length;j++) {
String s1 = list.get(i);
String s2 = s1.replace(targets[j], repls[j]);
String t = targets[j];
String r = repls[j];
//System.out.println("filtering line " + i + " pass " + j + " t:<" + t + "> r:<" + r + "> result: " + s2);
list.set(i,list.get(i).replace(targets[j], repls[j]));
}
}
}
}
static public boolean compareXmlNodes2(String tag, String lines2Ignore, Node n1, Node n2, boolean sortUnique, boolean singleAttrByLine, boolean ignoreChildrenLinesAsWell) {
return compareXmlNodes2(tag,lines2Ignore,null,null,n1,n2,sortUnique,singleAttrByLine, ignoreChildrenLinesAsWell);
}
static public boolean compareXmlNodes2(String tag, String lines2Ignore, Node n1, Node n2, boolean sortUnique, boolean singleAttrByLine) {
return compareXmlNodes2(tag,lines2Ignore,null,null,n1,n2,sortUnique,singleAttrByLine, false);
}
}