/* Copyright (c) 2014 Boundless and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/org/documents/edl-v10.html
*
* Contributors:
* Gabriel Roldan (Boundless) - initial implementation
*/
package org.locationtech.geogig.api.plumbing.diff;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Iterables.transform;
import static com.google.common.collect.Maps.uniqueIndex;
import static com.google.common.collect.Sets.newTreeSet;
import static com.google.common.collect.Sets.union;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeSet;
import javax.annotation.Nullable;
import javax.annotation.ParametersAreNonnullByDefault;
import org.locationtech.geogig.api.Bounded;
import org.locationtech.geogig.api.Bucket;
import org.locationtech.geogig.api.Node;
import org.locationtech.geogig.api.NodeRef;
import org.locationtech.geogig.api.ObjectId;
import org.locationtech.geogig.api.RevObject;
import org.locationtech.geogig.api.RevObject.TYPE;
import org.locationtech.geogig.api.RevTree;
import org.locationtech.geogig.repository.SpatialOps;
import org.locationtech.geogig.storage.NodeStorageOrder;
import org.locationtech.geogig.storage.ObjectDatabase;
import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Iterators;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.PeekingIterator;
import com.google.common.collect.Sets;
import com.vividsolutions.jts.geom.Envelope;
/**
* Provides a means to "walk" the differences between two {@link RevTree trees} in in-order order
* and emit diff events to a {@link Consumer}, which can choose to skip parts of the walk when it
* had collected enough information for its purpose and don't need to go further down a given pair
* of trees (either named or bucket).
*/
@ParametersAreNonnullByDefault
public class PreOrderDiffWalk {
private static final NodeStorageOrder ORDER = new NodeStorageOrder();
private final RevTree left;
private final RevTree right;
private final ObjectDatabase leftSource;
private final ObjectDatabase rightSource;
public PreOrderDiffWalk(RevTree left, RevTree right, ObjectDatabase leftSource,
ObjectDatabase rightSource) {
checkNotNull(left, "left");
checkNotNull(right, "right");
checkNotNull(leftSource, "leftSource");
checkNotNull(rightSource, "rightSource");
this.left = left;
this.right = right;
this.leftSource = leftSource;
this.rightSource = rightSource;
}
/**
* Walk up the differences between the two trees and emit events to the {@code consumer}.
* <p>
* If the two root trees are not equal, an initial call to {@link Consumer#tree(Node, Node)}
* will be made where the nodes will have {@link NodeRef#ROOT the root name} (i.e. empty
* string), and provided the consumer indicates to continue with the traversal, further calls to
* {@link Consumer#feature}, {@link Consumer#tree}, and/or {@link Consumer#bucket} will be made
* as changes between the two trees are found.
* <p>
* At any time, if {@link Consumer#tree} or {@link Consumer#bucket} returns {@code false}, that
* pair of trees won't be further evaluated and the traversal continues with their siblings or
* parents if there are no more siblings.
* <p>
* Note the {@code consumer} is only notified of nodes or buckets that differ, using
* {@code null} of either the left of right argument to indicate there's no matching object at
* the left or right side of the comparison. Left side nulls indicate a new object, right side
* nulls a deleted one. None of the {@code Consumer} method is ever called with equal left and
* right arguments.
*
* @param consumer the callback object that gets notified of changes between the two trees and
* can abort the walk for whole subtrees.
*/
public final void walk(Consumer consumer) {
if (left.equals(right)) {
return;
}
// start by asking the consumer if go on with the walk at all with the
// root nodes
Envelope lbounds = SpatialOps.boundsOf(left);
Node lnode = Node.create(NodeRef.ROOT, left.getId(), ObjectId.NULL, TYPE.TREE, lbounds);
Envelope rbounds = SpatialOps.boundsOf(right);
Node rnode = Node.create(NodeRef.ROOT, right.getId(), ObjectId.NULL, TYPE.TREE, rbounds);
if (consumer.tree(lnode, rnode)) {
traverseTree(consumer, left, right, 0);
}
consumer.endTree(lnode, rnode);
}
/**
* When this method is called its guaranteed that either {@link Consumer#tree} returned
* {@code true} (i.e. its a pair of trees pointed out by a Node), or {@link Consumer#bucket}
* returned {@code true} (i.e. they are trees pointed out by buckets).
*
* @param consumer the callback object
* @param left the tree at the left side of the comparisson
* @param right the tree at the right side of the comparisson
* @param bucketDepth the depth of bucket traversal (only non zero if comparing two bucket
* trees, as when called from {@link #handleBucketBucket})
* @precondition {@code left != null && right != null}
*/
private void traverseTree(Consumer consumer, RevTree left, RevTree right, int bucketDepth) {
checkArgument(left != null && right != null);
if (Objects.equal(left, right)) {
return;
}
// Possible cases:
// 1- left and right are leaf trees
// 2- left and right are bucket trees
// 3- left is leaf and right is bucketed
// 4- left is bucketed and right is leaf
final boolean leftIsLeaf = !left.buckets().isPresent();
final boolean rightIsLeaf = !right.buckets().isPresent();
Iterator<Node> leftc = leftIsLeaf ? left.children() : null;
Iterator<Node> rightc = rightIsLeaf ? right.children() : null;
if (leftIsLeaf && rightIsLeaf) {// 1-
traverseLeafLeaf(consumer, leftc, rightc);
} else if (!(leftIsLeaf || rightIsLeaf)) {// 2-
traverseBucketBucket(consumer, left, right, bucketDepth);
} else if (leftIsLeaf) {// 3-
traverseLeafBucket(consumer, leftc, right, bucketDepth);
} else {// 4-
traverseBucketLeaf(consumer, left, rightc, bucketDepth);
}
}
/**
* Traverse and compare the {@link RevTree#children() children} nodes of two leaf trees, calling
* {@link #node(Consumer, Node, Node)} for each diff.
*/
private void traverseLeafLeaf(Consumer consumer, Iterator<Node> leftc, Iterator<Node> rightc) {
PeekingIterator<Node> li = Iterators.peekingIterator(leftc);
PeekingIterator<Node> ri = Iterators.peekingIterator(rightc);
while (li.hasNext() && ri.hasNext()) {
Node lpeek = li.peek();
Node rpeek = ri.peek();
int order = ORDER.compare(lpeek, rpeek);
if (order < 0) {
node(consumer, li.next(), null);// removal
} else if (order == 0) {// change
// same feature at both sides of the traversal, consume them and check if its
// changed it or not
Node l = li.next();
Node r = ri.next();
if (!l.equals(r)) {
node(consumer, l, r);
}
} else {
node(consumer, null, ri.next());// addition
}
}
checkState(!li.hasNext() || !ri.hasNext(),
"either the left or the right iterator should have been fully consumed");
// right fully consumed, any remaining node in left is a removal
while (li.hasNext()) {
node(consumer, li.next(), null);
}
// left fully consumed, any remaining node in right is an add
while (ri.hasNext()) {
node(consumer, null, ri.next());
}
}
/**
* Called when found a difference between two nodes. It can be a removal ({@code right} is
* null), an added node ({@code left} is null}, or a modified feature/tree (neither is null);
* but {@code left} and {@code right} can never be equal.
* <p>
* Depending on the type of node, this method will call {@link Consumer#tree} or
* {@link Consumer#feature}, and continue the traversal down the trees in case it was a tree and
* {@link Consumer#tree} returned null.
*/
private void node(Consumer consumer, @Nullable final Node left, @Nullable final Node right) {
checkState(left != null || right != null, "both nodes can't be null");
checkArgument(!Objects.equal(left, right));
final TYPE type = left == null ? right.getType() : left.getType();
if (TYPE.FEATURE.equals(type)) {
consumer.feature(left, right);
} else {
checkState(TYPE.TREE.equals(type));
if (consumer.tree(left, right)) {
RevTree leftTree;
RevTree rightTree;
leftTree = left == null ? RevTree.EMPTY : leftSource.getTree(left.getObjectId());
rightTree = right == null ? RevTree.EMPTY : rightSource
.getTree(right.getObjectId());
traverseTree(consumer, leftTree, rightTree, 0);
}
consumer.endTree(left, right);
}
}
/**
* Compares a bucket tree (i.e. its size is greater than {@link RevTree#NORMALIZED_SIZE_LIMIT}
* and hence has been split into buckets) at the left side of the comparison, and a the
* {@link RevTree#children() children} nodes of a leaf tree at the right side of the comparison.
* <p>
* This happens when the left tree is much larger than the right tree
* <p>
* This traversal is symmetric to {@link #traverseLeafBucket} so be careful that any change made
* to this method shall have a matching change at {@link #traverseLeafBucket}
*
* @precondition {@code left.buckets().isPresent()}
*/
private void traverseBucketLeaf(final Consumer consumer, final RevTree left,
final Iterator<Node> right, final int bucketDepth) {
checkState(left.buckets().isPresent());
final SortedMap<Integer, Bucket> leftBuckets = left.buckets().get();
final ListMultimap<Integer, Node> nodesByBucket = splitNodesToBucketsAtDepth(right,
bucketDepth);
final SortedSet<Integer> bucketIndexes = Sets.newTreeSet(Sets.union(leftBuckets.keySet(),
nodesByBucket.keySet()));
// get all buckets at once, to leverage ObjectDatabase optimizations
final Map<ObjectId, RevObject> bucketTrees;
bucketTrees = uniqueIndex(leftSource.getAll(transform(leftBuckets.values(), BUCKET_ID)),
OBJECT_ID);
for (Integer bucketIndex : bucketIndexes) {
Bucket leftBucket = leftBuckets.get(bucketIndex);
List<Node> rightNodes = nodesByBucket.get(bucketIndex);// never returns null, but empty
if (null == leftBucket) {
traverseLeafLeaf(consumer, Iterators.<Node> emptyIterator(), rightNodes.iterator());
} else if (rightNodes.isEmpty()) {
if (consumer.bucket(bucketIndex, bucketDepth, leftBucket, null)) {
RevTree leftTree = (RevTree) bucketTrees.get(leftBucket.id());
// traverseBucketBucket(consumer, leftTree, RevTree.EMPTY, bucketDepth);
traverseTree(consumer, leftTree, RevTree.EMPTY, bucketDepth + 1);
}
consumer.endBucket(bucketIndex, bucketDepth, leftBucket, null);
} else {
RevTree leftTree = (RevTree) bucketTrees.get(leftBucket.id());
if (leftTree.buckets().isPresent()) {
traverseBucketLeaf(consumer, leftTree, rightNodes.iterator(), bucketDepth + 1);
} else {
traverseLeafLeaf(consumer, leftTree.children(), rightNodes.iterator());
}
}
}
}
private static final Function<Bucket, ObjectId> BUCKET_ID = new Function<Bucket, ObjectId>() {
@Override
public ObjectId apply(Bucket b) {
return b.id();
}
};
private static final Function<RevObject, ObjectId> OBJECT_ID = new Function<RevObject, ObjectId>() {
@Override
public ObjectId apply(RevObject o) {
return o.getId();
}
};
/**
* Compares a bucket tree (i.e. its size is greater than {@link RevTree#NORMALIZED_SIZE_LIMIT}
* and hence has been split into buckets) at the right side of the comparison, and a the
* {@link RevTree#children() children} nodes of a leaf tree at the left side of the comparison.
* <p>
* This happens when the right tree is much larger than the left tree
* <p>
* This traversal is symmetric to {@link #traverseBucketLeaf} so be careful that any change made
* to this method shall have a matching change at {@link #traverseBucketLeaf}
*
* @precondition {@code right.buckets().isPresent()}
*/
private void traverseLeafBucket(final Consumer consumer, final Iterator<Node> left,
final RevTree right, final int bucketDepth) {
checkState(right.buckets().isPresent());
final SortedMap<Integer, Bucket> rightBuckets = right.buckets().get();
final ListMultimap<Integer, Node> nodesByBucket = splitNodesToBucketsAtDepth(left,
bucketDepth);
final SortedSet<Integer> bucketIndexes = Sets.newTreeSet(Sets.union(rightBuckets.keySet(),
nodesByBucket.keySet()));
// get all buckets at once, to leverage ObjectDatabase optimizations
final Map<ObjectId, RevObject> bucketTrees;
bucketTrees = uniqueIndex(rightSource.getAll(transform(rightBuckets.values(), BUCKET_ID)),
OBJECT_ID);
for (Integer bucketIndex : bucketIndexes) {
Bucket rightBucket = rightBuckets.get(bucketIndex);
List<Node> leftNodes = nodesByBucket.get(bucketIndex);// never returns null, but empty
if (null == rightBucket) {
traverseLeafLeaf(consumer, leftNodes.iterator(), Iterators.<Node> emptyIterator());
} else if (leftNodes.isEmpty()) {
if (consumer.bucket(bucketIndex, bucketDepth, null, rightBucket)) {
RevTree rightTree = (RevTree) bucketTrees.get(rightBucket.id());
// traverseBucketBucket(consumer, RevTree.EMPTY, rightTree, bucketDepth);
traverseTree(consumer, RevTree.EMPTY, rightTree, bucketDepth + 1);
}
consumer.endBucket(bucketIndex, bucketDepth, null, rightBucket);
} else {
RevTree rightTree = (RevTree) bucketTrees.get(rightBucket.id());
if (rightTree.buckets().isPresent()) {
traverseLeafBucket(consumer, leftNodes.iterator(), rightTree, bucketDepth + 1);
} else {
traverseLeafLeaf(consumer, leftNodes.iterator(), rightTree.children());
}
}
}
}
/**
* Split the given nodes into lists keyed by the bucket indes they would belong if they were
* part of a tree bucket at the given {@code bucketDepth}
*/
private ListMultimap<Integer, Node> splitNodesToBucketsAtDepth(Iterator<Node> nodes,
final int bucketDepth) {
Function<Node, Integer> keyFunction = new Function<Node, Integer>() {
@Override
public Integer apply(Node node) {
return ORDER.bucket(node, bucketDepth);
}
};
ListMultimap<Integer, Node> nodesByBucket = Multimaps.index(nodes, keyFunction);
return nodesByBucket;
}
/**
* Traverse two bucket trees and notify their differences to the {@code consumer}.
* <p>
* If this method is called than its guaranteed that the two bucket trees are note equal (one of
* them may be empty though), and that {@link Consumer#bucket} returned {@code true}
* <p>
* For each bucket index present in the joint set of the two trees buckets,
* {@link #traverseTree(Consumer, RevTree, RevTree, int)} will be called for the bucket trees
* that are not equal with {@code bucketDepth} incremented by one.
*
* @param consumer the callback object to receive diff events from the comparison of the two
* trees
* @param left the bucket tree at the left side of the comparison
* @param right the bucket tree at the right side of the comparison
* @param bucketDepth the current depth at which the comparison is evaluating these two bucket
* trees
* @see #traverseTree(Consumer, RevTree, RevTree, int)
* @precondition {@code !left.equals(right)}
* @precondition {@code left.isEmpty() || left.buckets().isPresent()}
* @precondition {@code right.isEmpty() || right.buckets().isPresent()}
*/
private void traverseBucketBucket(Consumer consumer, final RevTree left, final RevTree right,
final int bucketDepth) {
checkState(left.isEmpty() || left.buckets().isPresent());
checkState(right.isEmpty() || right.buckets().isPresent());
ImmutableSortedMap<Integer, Bucket> lb = left.buckets().get();
ImmutableSortedMap<Integer, Bucket> rb = right.buckets().get();
TreeSet<Integer> availableIndexes = newTreeSet(union(lb.keySet(), rb.keySet()));
@Nullable
Bucket lbucket;
@Nullable
Bucket rbucket;
for (Integer index : availableIndexes) {
lbucket = lb.get(index);
rbucket = rb.get(index);
if (Objects.equal(lbucket, rbucket)) {
continue;
}
if (consumer.bucket(index.intValue(), bucketDepth, lbucket, rbucket)) {
RevTree ltree = lbucket == null ? RevTree.EMPTY : leftSource.getTree(lbucket.id());
RevTree rtree = rbucket == null ? RevTree.EMPTY : rightSource.getTree(rbucket.id());
traverseTree(consumer, ltree, rtree, bucketDepth + 1);
}
consumer.endBucket(index.intValue(), bucketDepth, lbucket, rbucket);
}
}
/**
* Defines an interface to consume the events emitted by a diff-tree "depth first" traversal,
* with the ability to be notified of changes to feature and tree nodes, as well as to buckets,
* and to skip the further traversal of whole trees (either as pointed out by "name tree" nodes,
* or internal tree buckets).
* <p>
* This is especially useful when there's no need to traverse the whole diff to compute the
* desired result, as it can be the case of counting changes between two trees where one side of
* the comparison does not have such tree, or the spatial bounds of the difference between two
* trees on the same case.
* <p>
* The first call will always be to {@link #tree(Node, Node)} with the root tree nodes, or there
* may be no call to any method at all if the two tree nodes are equal.
* <p>
* This also allows to parallelize some computations where there's no need to have the output of
* the tree comparison in "prescribed storage order" as defined by {@link NodeStorageOrder}.
*/
public static interface Consumer {
/**
* Called when either two leaf trees are being compared and a feature node have changed (i.e
* neither {@code left} nor {@code right} is null, or a feature has been deleted (
* {@code left} is null), or added ({@code right} is null).
*
* @param left the feature node at the left side of the traversal; may be {@code null) in
* which case {@code right} has been added.
* @param right the feature node at the right side of the traversal; may be {@code null} in
* which case {@code left} has been removed.
* @precondition {@code left != null || right != null}
* @precondition {@code if(left != null && right != null) then left.name() == right.name()}
*/
public abstract void feature(@Nullable final Node left, @Nullable final Node right);
/**
* Called when the traversal finds a tree node at both sides of the traversal with the same
* name and pointing to different trees (i.e. a changed tree), or just one node tree at
* either side of the traversal with no corresponding tree node at the other side (i.e.
* either an added tree - {@code left} is null -, or a deleted tree - {@code right} is null
* -).
* <p>
* If this method returns {@code true} then the traversal will continue down the tree(s)
* contents, calling {@link #bucket}, {@link #feature}, or {@link #tree} as appropriate. If
* this method returns {@code false} the traversal of the tree(s) contents will be skipped
* and continue with the siblings or parents' siblings if there are no more nodes to
* evaluate at the current depth.
*
* @param left the left tree of the traversal
* @param right the right tree of the traversal
* @return {@code true} if the traversal of the contents of these trees should come right
* after this method returns, {@code false} if this consumer does not want to
* continue traversing the trees pointed out by these nodes
* @precondition {@code left != null || right != null}
*/
public abstract boolean tree(@Nullable final Node left, @Nullable final Node right);
/**
* Called once done with a {@link #tree}, regardless of the returned value
*/
public abstract void endTree(@Nullable final Node left, @Nullable final Node right);
/**
* Called when the traversal finds either a bucket at both sides of the traversal with same
* depth an index that have changed, or just one at either side of the comparison with no
* node at the other side that would fall into that bucket if it existed.
* <p>
* When comparing the contents of two trees, it could be that both are bucket trees and then
* this method will be called for each bucket index/depth, resulting in calls to this method
* with wither both buckets or one depending on the existence of buckets at the given index
* at both sides.
* <p>
* Or it can also be that only one of the trees is be a bucket tree and the other a leaf
* tree, in which case this method can be called only if the leaf tree has no node that
* would fall on the same bucket index at the current bucket depth; otherwise
* {@link #feature}, {@link #tree}, or this same method will be called recursively while
* evaluating the leaf tree nodes that would fall on this bucket index and depth, as
* compared with the nodes of the tree pointed out by the bucket that exists at the other
* side of the traversal, or any of its children bucket trees at a more deep bucket, until
* there's no ambiguity.
* <p>
* If this method returns {@code true}, then the traversal will continue down to the
* contents of the trees pointed out but the bucket(s), otherwise the bucket(s) contents
* will be skipped and the traversal continues with the next bucket index, or the parents
* trees siblings.
*
* @param bucketIndex the index of the bucket inside the bucket trees being evaluated, its
* the same for both buckets in case buckets for the same index are present in both
* trees
* @param bucketDepth the depth of the bucket(s)
* @param left the bucket at the given index on the left-tree of the traversal, or
* {@code null} if no bucket exists on the left tree for that index
* @param right the bucket at the given index on the right-tree of the traversal, or
* {@code null} if no bucket exists on the left tree for that index
* @return {@code true} if a call to {@link #tree(Node, Node)} should come right after this
* method is called, {@code false} if this consumer does not want to continue the
* traversal deeper for the trees pointed out by these buckets.
* @precondition {@code left != null || right != null}
*/
public abstract boolean bucket(final int bucketIndex, final int bucketDepth,
@Nullable final Bucket left, @Nullable final Bucket right);
/**
* Called once done with a {@link #bucket}, regardless of the returned value
*/
public abstract void endBucket(final int bucketIndex, final int bucketDepth,
@Nullable final Bucket left, @Nullable final Bucket right);
}
/**
* Template class for consumer decorators, forwards all event calls to the provided consumer;
* concrete subclasses shall override the event methods of their interest.
*/
public static abstract class ForwardingConsumer implements Consumer {
private Consumer delegate;
public ForwardingConsumer(final Consumer delegate) {
this.delegate = delegate;
}
@Override
public void feature(Node left, Node right) {
delegate.feature(left, right);
}
@Override
public boolean tree(Node left, Node right) {
return delegate.tree(left, right);
}
@Override
public void endTree(Node left, Node right) {
delegate.endTree(left, right);
}
@Override
public boolean bucket(int bucketIndex, int bucketDepth, Bucket left, Bucket right) {
return delegate.bucket(bucketIndex, bucketDepth, left, right);
}
@Override
public void endBucket(int bucketIndex, int bucketDepth, Bucket left, Bucket right) {
delegate.endBucket(bucketIndex, bucketDepth, left, right);
}
}
public static class FilteringConsumer extends ForwardingConsumer {
private final Predicate<Bounded> predicate;
public FilteringConsumer(final Consumer delegate, final Predicate<Bounded> predicate) {
super(delegate);
this.predicate = predicate;
}
@Override
public void feature(Node left, Node right) {
if (predicate.apply(left) || predicate.apply(right)) {
super.feature(left, right);
}
}
@Override
public boolean tree(Node left, Node right) {
if (predicate.apply(left) || predicate.apply(right)) {
return super.tree(left, right);
}
return false;
}
@Override
public void endTree(Node left, Node right) {
if (predicate.apply(left) || predicate.apply(right)) {
super.endTree(left, right);
}
}
@Override
public boolean bucket(int bucketIndex, int bucketDepth, Bucket left, Bucket right) {
if (predicate.apply(left) || predicate.apply(right)) {
return super.bucket(bucketIndex, bucketDepth, left, right);
}
return false;
}
@Override
public void endBucket(int bucketIndex, int bucketDepth, Bucket left, Bucket right) {
if (predicate.apply(left) || predicate.apply(right)) {
super.endBucket(bucketIndex, bucketDepth, left, right);
}
}
}
}