package jcmp;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.ListIterator;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.HashSet;
import jiff.JsonDiff;
import jiff.AbstractFieldFilter;
/**
* Compares two documents json documents, and builds an list of all changes
*
* Containers (objects and arrays) are compared recursively. The comparison
* algorithm works like this:
*
* Objects: A field-by-field comparison is done. If a field exists in the first
* document but not in the second, that field is removed. If a field exists in
* the second document but not the first, that field is added. If a field exists
* in both documents with different values, that field is modified.
*
* Arrays: There are two possible algorithms to compare arrays. If array
* elements contain a unique identifier (which is defined by the caller), then
* array elements of the first and the second document are matched using the
* unique identifiers of array elements. Then each matching array element is
* compared to generate the detailed difference. If array elements don't have
* unique identifiers, then each element of the first array is compared to each
* element of the second array, and the elements with minimal number of changes
* are associated. Elements that are too different from each other are not
* associated.
*
* Differences:
*
* An Addition denotes a new field or array element. Addition.field1 is null,
* meaning the field does not exist in document1, and Addition.field2 denotes
* the new field, or array element.
*
* A Removal denotes a removed field or array element. Removal.field1 denotes
* the element in document1, and Removal.field2 is null.
*
* A Modification denotes a content modification of a field, or array element.
* Both field1 and field2 are non-null, and set to the name of the modified
* field.
*
* A Move denotes an array element move. field1 denotes the old index of the
* array element, and field2 denotes the new index.
*
* If new elements are added to an array, or existing elements are removed, the
* addition and removal appear as diff, and any node that shifted during the
* operation appears within a Move.
*/
public abstract class DocCompare<BaseType, ValueType, ObjectType, ArrayType> {
/**
* Thrown if there is an array whose elements contain identities, but for at
* least one element identity cannot be retrieved
*/
public static final class InvalidArrayIdentity extends Exception {
public InvalidArrayIdentity(List<String> p) {
this(JsonDiff.toString(p));
}
public InvalidArrayIdentity(String p) {
super(p);
}
}
/**
* Thrown if there is an array whose elements contain identities, but they
* are not unique
*/
public static final class DuplicateArrayIdentity extends Exception {
public DuplicateArrayIdentity(List<String> p) {
this(JsonDiff.toString(p));
}
public DuplicateArrayIdentity(String p) {
super(p);
}
}
/**
* The array element identity extractor interface.
*/
public interface IdentityExtractor<T> {
/**
* It should return an identity object from the given array element. The
* returned object should implement equals and hashCode methods.
*/
Object getIdentity(T element);
}
/**
* Contains the edit script, and number of changed and unchanged fields. The
* number of changed fields does not include array element moves, it only
* includes additions, removals, and modifications.
*/
public static class Difference<T> {
private final List<Delta<T>> delta;
private int numUnchangedFields;
private int numChangedFields;
/**
* Default ctor, sets numUnchangedFields to zero, initializes an empty
* list
*/
public Difference() {
delta = new ArrayList<>();
numUnchangedFields = 0;
}
public Difference(List<Delta<T>> delta) {
this.delta = delta;
for (Delta d : this.delta) {
if (d instanceof Addition
|| d instanceof Removal
|| d instanceof Modification) {
numChangedFields++;
}
}
}
/**
* Constructs a difference with one modification, no unchanged fields
*/
public Difference(Delta<T> d) {
this(new ArrayList<Delta<T>>(1));
add(d);
}
/**
* Constructs a Difference denoting no difference
*/
public Difference(int numFields) {
delta = new ArrayList<>();
numUnchangedFields = numFields;
}
/**
* Returns the number of unmodified fields
*/
public int getNumUnchangedFields() {
return numUnchangedFields;
}
/**
* Returns the number of modified fields, excluding array element moves
*/
public int getNumChangedFields() {
return numChangedFields;
}
/**
* Returns the list of changes
*/
public List<Delta<T>> getDelta() {
return delta;
}
public void add(Difference<T> diff) {
delta.addAll(diff.delta);
numUnchangedFields += diff.numUnchangedFields;
numChangedFields += diff.numChangedFields;
}
public void add(Delta<T> d) {
delta.add(d);
if (d instanceof Addition
|| d instanceof Removal
|| d instanceof Modification) {
numChangedFields++;
}
}
/**
* A numeric value between 0 and 1 denoting how much an object is
* changed. 0 means no change, 1 means everything is changed.
*/
public double getChangeAmount() {
double d = numChangedFields + numUnchangedFields;
return d == 0 ? 0 : numChangedFields / d;
}
/**
* Returns true if there are not changes
*/
public boolean same() {
return delta.isEmpty();
}
@Override
public String toString() {
StringBuilder bld = new StringBuilder();
for (Delta x : delta) {
bld.append(x.toString()).append('\n');
}
return bld.toString();
}
}
/**
* Base class for a delta
*/
public static abstract class Delta<T> {
protected final String field1;
protected final String field2;
public Delta(String field1, String field2) {
this.field1 = field1;
this.field2 = field2;
}
public Delta(List<String> field1, List<String> field2) {
this(field1 == null ? null : JsonDiff.toString(field1), field2 == null ? null : JsonDiff.toString(field2));
}
public String getField1() {
return field1;
}
public String getField2() {
return field2;
}
/**
* Return the non-null field, or field1 if both are non-null
*/
public String getField() {
return field1 == null ? field2 : field1;
}
}
/**
* Denotes an addition of a field or array element that isn't present in
* doc1
*/
public static class Addition<T> extends Delta<T> {
private final T addedNode;
public Addition(String field2, T addedNode) {
super(null, field2);
this.addedNode = addedNode;
}
public Addition(List<String> field2, T addedNode) {
super(null, field2);
this.addedNode = addedNode;
}
public T getAddedNode() {
return addedNode;
}
@Override
public String toString() {
return "+ " + field2 + ":" + addedNode;
}
}
/**
* Denotes a removal of a field or array element that is present in doc1 but
* not in doc2
*/
public static class Removal<T> extends Delta<T> {
private final T removedNode;
public Removal(String field1, T removedNode) {
super(field1, null);
this.removedNode = removedNode;
}
public Removal(List<String> field1, T removedNode) {
super(field1, null);
this.removedNode = removedNode;
}
public T getRemovedNode() {
return removedNode;
}
@Override
public String toString() {
return "- " + field1 + ":" + removedNode;
}
}
/**
* Denotes an array element move from one element index to another
*/
public static class Move<T> extends Delta<T> {
private final T movedNode;
public Move(String field1, String field2, T movedNode) {
super(field1, field2);
this.movedNode = movedNode;
}
public Move(List<String> field1, List<String> field2, T movedNode) {
super(field1, field2);
this.movedNode = movedNode;
}
public T getMovedNode() {
return movedNode;
}
@Override
public String toString() {
return "* " + field1 + "->" + field2 + ":" + movedNode;
}
}
/**
* Denotes a field modification
*/
public static class Modification<T> extends Delta<T> {
private final T node1;
private final T node2;
public Modification(String field1, T node1, String field2, T node2) {
super(field1, field2);
this.node1 = node1;
this.node2 = node2;
}
public Modification(List<String> field1, T node1, List<String> field2, T node2) {
super(field1, field2);
this.node1 = node1;
this.node2 = node2;
}
public T getUnmodifiedNode() {
return node1;
}
public T getModifiedNode() {
return node2;
}
@Override
public String toString() {
return "* " + field1 + "->" + field2 + ":" + node1 + " -> " + node2;
}
}
/**
* Contains a list of fields contained in array elements that uniquely
* identify array elements
*/
public static class ArrayIdentityFields {
private String[] fields;
public ArrayIdentityFields(String... fields) {
this.fields = fields;
}
public String[] getFields() {
return fields;
}
}
/**
* Default array identity object. Contains the identity values, computes
* hashcode from them
*/
public static class DefaultIdentity<T> {
private final T[] nodes;
private Integer hcode;
public DefaultIdentity(T[] nodes) {
this.nodes = nodes;
}
@Override
public int hashCode() {
if (hcode == null) {
int code = 0;
for (int i = 0; i < nodes.length; i++) {
if (nodes[i] != null) {
code += nodes[i].hashCode();
}
}
hcode = code;
}
return hcode;
}
@Override
public boolean equals(Object x) {
try {
DefaultIdentity d = (DefaultIdentity) x;
for (int i = 0; i < nodes.length; i++) {
if (!d.nodes[i].equals(nodes[i])) {
return false;
}
}
} catch (Exception e) {
return false;
}
return true;
}
}
private final Map<String, ArrayIdentityFields> arrayIdentities = new HashMap<>();
protected abstract boolean isValue(BaseType value);
protected abstract boolean isArray(BaseType value);
protected abstract boolean isObject(BaseType value);
protected abstract boolean isNull(BaseType value);
protected abstract ValueType asValue(BaseType value);
protected abstract ArrayType asArray(BaseType value);
protected abstract ObjectType asObject(BaseType value);
protected abstract boolean equals(ValueType v1, ValueType v2);
protected abstract Iterator<Map.Entry<String, BaseType>> getFields(ObjectType o);
protected abstract boolean hasField(ObjectType value, String field);
protected abstract BaseType getField(ObjectType value, String field);
protected abstract IdentityExtractor getArrayIdentityExtractorImpl(ArrayIdentityFields fields);
protected abstract BaseType getElement(ArrayType value, int index);
protected abstract int size(ArrayType value);
/**
* Adds a group of fields that can uniquely identify array elements for
* object arrays
*
* @param array The name of the array field
* @param identities The fields of the array element that can identiy an
* element
*
* In the following document: <pre>
* {
* ...
* "aField": [
* { "_id":1,"field":...},
* { "_id":2,"field":...}
* ]
* }
* <pre>
* the call looks like
* <pre>
* jsonCompare.addArrayIdentity(new Path("aField"),new Path("_id"));
* </pre> If there are more than one fields that uniquely identify an
* eleent, list those in the argument list.
*/
public void addArrayIdentity(String array, String... identities) {
arrayIdentities.put(array, new ArrayIdentityFields(identities));
}
/**
* Compares two documents and returns the difference
*/
public Difference<BaseType> compareNodes(BaseType node1, BaseType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
return compareNodes(new ArrayList<String>(), node1, new ArrayList<String>(), node2);
}
public Difference<BaseType> compareNodes(List<String> field1,
BaseType node1,
List<String> field2,
BaseType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
if (isValue(node1) && isValue(node2)) {
if (!equals(asValue(node1), asValue(node2))) {
return new Difference(new Modification(field1, node1, field2, node2));
}
} else if (isArray(node1) && isArray(node2)) {
return compareArrays(field1, asArray(node1), field2, asArray(node2));
} else if (isObject(node1) && isObject(node2)) {
return compareObjects(field1, asObject(node1), field2, asObject(node2));
} else if (!(isNull(node1) && isNull(node2))) {
return new Difference<>(new Modification(field1, node1, field2, node2));
}
return new Difference<>(1);
}
/**
* Compares two object nodes recursively and returns the differences
*/
public Difference<BaseType> compareObjects(List<String> field1,
ObjectType node1,
List<String> field2,
ObjectType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
Difference<BaseType> ret = new Difference<>();
// Field by field comparison of obj1 to obj2.
for (Iterator<Map.Entry<String, BaseType>> fields = getFields(node1); fields.hasNext();) {
Map.Entry<String, BaseType> field = fields.next();
String fieldName = field.getKey();
field1.add(fieldName);
BaseType value1 = field.getValue();
if (hasField(node2, fieldName)) {
// If both obj1 and obj2 have the same field, compare recursively
field2.add(fieldName);
BaseType value2 = getField(node2, fieldName);
ret.add(compareNodes(field1, value1, field2, value2));
pop(field2);
} else {
// obj1.field1 exists, obj2.field1 does not, so it is removed
ret.add(new Removal(field1, value1));
}
pop(field1);
}
// Now compare any new nodes added to obj2
for (Iterator<Map.Entry<String, BaseType>> fields = getFields(node2); fields.hasNext();) {
Map.Entry<String, BaseType> field = fields.next();
String fieldName = field.getKey();
if (!hasField(node1, fieldName)) {
field2.add(fieldName);
ret.add(new Addition(field2, field.getValue()));
pop(field2);
}
}
return ret;
}
public IdentityExtractor getArrayIdentityExtractor(String arrayField) {
return getArrayIdentityExtractor(AbstractFieldFilter.parse(arrayField));
}
public IdentityExtractor getArrayIdentityExtractor(List<String> arrayField) {
List<String> p = new ArrayList<>();
int n = arrayField.size();
for (int i = 0; i < n; i++) {
String s = arrayField.get(i);
if ("*".equals(s) || isIndex(s)) {
p.add("*");
} else {
p.add(s);
}
}
ArrayIdentityFields fields = arrayIdentities.get(JsonDiff.toString(p));
if (fields != null) {
return getArrayIdentityExtractorImpl(fields);
} else {
return null;
}
}
private boolean isIndex(String s) {
try {
Integer.valueOf(s);
return true;
} catch (Exception e) {
return false;
}
}
public Difference<BaseType> compareArrays(List<String> field1,
ArrayType node1,
List<String> field2,
ArrayType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
IdentityExtractor ext = getArrayIdentityExtractor(field1);
if (ext == null) {
return compareArraysNoId(field1, node1, field2, node2);
} else {
return compareArraysWithId(field1, node1, field2, node2, ext);
}
}
/**
* Computes difference between arrays whose elements can be identitied by a
* unique identifier
*/
public Difference<BaseType> compareArraysWithId(List<String> field1,
ArrayType node1,
List<String> field2,
ArrayType node2,
IdentityExtractor idex)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
Difference<BaseType> ret = new Difference<>();
// Build a map of identity -> index for both arrays
final Map<Object, Integer> identities1 = getIdentityMap(field1, node1, idex);
final Map<Object, Integer> identities2 = getIdentityMap(field2, node2, idex);
// Iterate all elements of array 1
for (Map.Entry<Object, Integer> entry1 : identities1.entrySet()) {
// Append index to the field name
field1.add(Integer.toString(entry1.getValue()));
// If array2 doesn't have an element with the same ID, this is a deletion
Integer index2 = identities2.get(entry1.getKey());
if (index2 == null) {
ret.add(new Removal(field1, getElement(node1, entry1.getValue())));
} else {
field2.add(Integer.toString(index2));
// array2 has the same element
// If it is at a different index, this is a move
if (index2 != entry1.getValue()) {
ret.add(new Move(field1, field2, getElement(node1, entry1.getValue())));
}
// Recursively compare contents to get detailed diff
ret.add(compareNodes(field1, getElement(node1, entry1.getValue()),
field2, getElement(node2, index2)));
pop(field2);
}
pop(field1);
}
// Now check elements of array 2 that are not in array 1
for (Map.Entry<Object, Integer> entry2 : identities2.entrySet()) {
if (!identities1.containsKey(entry2.getKey())) {
// entry2 is not in array 1: addition
field2.add(Integer.toString(entry2.getValue()));
ret.add(new Addition(field2, getElement(node2, entry2.getValue())));
pop(field2);
}
}
return ret;
}
private static class Pair {
private final int i1, i2;
public int hashCode() {
return i1 * 1001 + i2;
}
public boolean equals(Object o) {
try {
return ((Pair) o).i1 == i1
&& ((Pair) o).i2 == i2;
} catch (Exception e) {
return false;
}
}
public Pair(int i1, int i2) {
this.i1 = i1;
this.i2 = i2;
}
}
/**
* Computes difference between arrays by comparing every element recursively
* and trying to find the closest match
*/
public Difference<BaseType> compareArraysNoId(List<String> field1,
ArrayType node1,
List<String> field2,
ArrayType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
Difference<BaseType> ret = new Difference<>();
IndexAssoc assoc = new IndexAssoc(size(node1), size(node2));
HashSet<Pair> comparedPairs = new HashSet<>();
// First associate exact matches
// We loop through the unassociated elements of node1, and node2
// If the nodes are equal, we associate them
// if they are not, we note the distance between the two, so later
// we don't need to re-compare them
for (assoc.start1(); assoc.hasNext1();) {
int index1 = assoc.next1();
BaseType element1 = getElement(node1, index1);
field1.add(Integer.toString(index1));
for (assoc.start2(); assoc.hasNext2();) {
int index2 = assoc.next2();
BaseType element2 = getElement(node2, index2);
field2.add(Integer.toString(index2));
comparedPairs.add(new Pair(index1, index2));
Difference diff = compareNodes(field1, element1, field2, element2);
if (diff.same()) {
assoc.associate(index1, index2);
pop(field2);
break;
} else {
assoc.recordDistance(index1, index2, diff);
}
pop(field2);
}
pop(field1);
}
// Here, we associated all exact matching nodes
// All remaining nodes need to be compared to each other
// First compare all node1 elements to node2 elements
for (assoc.start1(); assoc.hasNext1();) {
int index1 = assoc.next1();
BaseType element1 = getElement(node1, index1);
field1.add(Integer.toString(index1));
for (assoc.start2(); assoc.hasNext2();) {
int index2 = assoc.next2();
BaseType element2 = getElement(node2, index2);
field2.add(Integer.toString(index2));
Pair p = new Pair(index1, index2);
// Do we have a distance recorded for these nodes?
if (comparedPairs.contains(p)) {
// No distance: compare the nodes
Difference diff = compareNodes(field1, element1, field2, element2);
assoc.recordDistance(index1, index2, diff);
comparedPairs.add(p);
}
pop(field2);
}
IxDiff ixdiff = assoc.getMin(index1);
// If an object has changed more that 0.5 (more than half
// of its fields are changed), then it is not a match
if (ixdiff == null || ixdiff.change > 0.5) {
// No matching node for node1
ret.add(new Removal(field1, element1));
assoc.remove1(index1);
} else {
// Matching node
assoc.associate(index1, ixdiff.index2);
ret.add(ixdiff.diff);
}
pop(field1);
}
// Anything remaining on node2 are nodes that are added
for (assoc.start2(); assoc.hasNext2();) {
int index = assoc.next2();
BaseType element2 = getElement(node2, index);
field2.add(Integer.toString(index));
ret.add(new Addition(field2, element2));
pop(field2);
}
// Look at associations for moved nodes
for (Map.Entry<Integer, Integer> entry : assoc.assoc.entrySet()) {
if (entry.getKey() != entry.getValue()) {
field1.add(Integer.toString(entry.getKey()));
field2.add(Integer.toString(entry.getValue()));
BaseType node = getElement(node1, entry.getKey());
ret.add(new Move(field1, field2, node));
pop(field2);
pop(field1);
}
}
return ret;
}
/**
* Keeps the distance between two array indexes.
*/
private static class IxDiff<T> {
private Difference<T> diff;
private double change;
private int index1, index2;
public IxDiff(Difference<T> diff,
double change,
int index1,
int index2) {
this.diff = diff;
this.change = change;
this.index1 = index1;
this.index2 = index2;
}
}
/**
* Keeps associations between array indexes
*/
private static class IndexAssoc {
private final ArrayList<Integer> ix1 = new ArrayList<>();
private final ArrayList<Integer> ix2 = new ArrayList<>();
private int itr1;
private int itr2;
private int last1, last2;
private final Map<Integer, Integer> assoc = new HashMap<>();
/**
* Keeps the IxDiff with the minimum change between index1 and index2,
* keyed on index1
*/
private final Map<Integer, IxDiff> minimums1 = new HashMap<>();
/**
* Construct with two arrays of size1 and size2
*/
public IndexAssoc(int size1, int size2) {
for (int i = 0; i < size1; i++) {
ix1.add(i);
}
for (int i = 0; i < size2; i++) {
ix2.add(i);
}
}
/**
* Start iterating the unassociated indexes of the first array
*/
public void start1() {
itr1 = -1;
}
/**
* Returns true if the first array has more unassociated indexes
*/
public boolean hasNext1() {
return (itr1 + 1) < ix1.size();
}
/**
* Returns the current unassociated index of the first array, moves to
* the next unassociated index
*/
public int next1() {
itr1++;
return last1 = ix1.get(itr1);
}
public void remove1(int index) {
int l1 = ix1.indexOf(index);
if (l1 >= 0) {
ix1.remove(l1);
if (itr1 >= l1) {
itr1--;
}
}
}
/**
* Start iterating the unassociated indexes of the second array
*/
public void start2() {
itr2 = -1;
}
/**
* Returns true if the second array has more unassociated indexes
*/
public boolean hasNext2() {
return (itr2 + 1) < ix2.size();
}
/**
* Returns the current unassociated index of the second array, moves to
* the next unassociated index
*/
public int next2() {
itr2++;
return last2 = ix2.get(itr2);
}
public void remove2(int index) {
int l2 = ix2.indexOf(index);
if (l2 >= 0) {
ix2.remove(l2);
if (itr2 >= l2) {
itr2--;
}
}
}
public void associate(int index1, int index2) {
remove1(index1);
remove2(index2);
assoc.put(index1, index2);
}
/**
* Records the amount of difference between the nodes last returned by
* next(). Also stores the minumum amount of difference for node1
*/
public void recordDistance(int index1, int index2, Difference diff) {
double change = diff.getChangeAmount();
IxDiff m = minimums1.get(index1);
if (m == null || m.change > change) {
minimums1.put(index1, m = new IxDiff(diff, change, index1, index2));
}
}
public IxDiff getMin(int index1) {
return minimums1.get(index1);
}
}
private Map<Object, Integer> getIdentityMap(List<String> field, ArrayType array, IdentityExtractor idex)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
final int size = size(array);
final Map<Object, Integer> identities = new HashMap<>(size);
// Fill up identities into identity maps
for (int i = 0; i < size; i++) {
Object id = idex.getIdentity(getElement(array, i));
if (id == null) {
throw new InvalidArrayIdentity(JsonDiff.toString(field) + "." + i);
}
if (identities.put(id, i) != null) {
throw new DuplicateArrayIdentity(JsonDiff.toString(field) + "." + i);
}
}
return identities;
}
private static void pop(List<String> l) {
l.remove(l.size() - 1);
}
}