/* Copyright (c) 2013-2014 Boundless and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/org/documents/edl-v10.html
*
* Contributors:
* Johnathan Garrett (LMN Solutions) - initial implementation
*/
package org.locationtech.geogig.repository;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.locationtech.geogig.api.Bucket;
import org.locationtech.geogig.api.Node;
import org.locationtech.geogig.api.ObjectId;
import org.locationtech.geogig.api.RevCommit;
import org.locationtech.geogig.api.RevObject;
import org.locationtech.geogig.api.RevTag;
import org.locationtech.geogig.api.RevTree;
import org.locationtech.geogig.storage.Deduplicator;
import org.locationtech.geogig.storage.ObjectDatabase;
import com.google.common.collect.AbstractIterator;
/**
* The PostOrderIterator class provides utilities for traversing a GeoGig revision history graph in
* postorder or depth-first order. In the context of a GeoGig revision this means that if objects A
* and B are both in the subgraph being traversed and A references B, then B will be visited before
* A.
*
* PostOrderIterator should not be constructed directly, but rather instantiated via static factory
* methods provided for specific configurations. Performance characteristics vary according to the
* traversal policy. In general, an iterator must track all the object ids that have been enqueued,
* but not yet visited, so it will require memory at least proportional to the depth (in a traversal
* of all objects in a repository, this means all objects along the path from the newest commit
* being traversed to the oldest commit being traversed and through the tree structure at that
* commit to the features.) Some traversals use additional memory to avoid re-visiting objects that
* are reachable via multiple paths (eg, features that are part of multiple commits.)
*/
public class PostOrderIterator extends AbstractIterator<RevObject> {
/**
* A traversal of all objects reachable from the given origin, with deduplication.
*/
public static Iterator<RevObject> all(ObjectId top, ObjectDatabase database, Deduplicator deduplicator) {
List<ObjectId> start = new ArrayList<ObjectId>();
start.add(top);
return new PostOrderIterator(start, database, unique(ALL_SUCCESSORS));
}
/**
* A traversal of all objects reachable from the given start list but not reachable from the
* base list, with deduplication. If the traverseCommits flag is set, then the ancestry of the
* commits will be traversed as well as the content, otherwise only the content.
*/
public static Iterator<RevObject> range(List<ObjectId> start, List<ObjectId> base,
ObjectDatabase database, boolean traverseCommits, Deduplicator deduplicator) {
return new PostOrderIterator(new ArrayList<ObjectId>(start), database, //
uniqueWithDeduplicator(blacklist((traverseCommits ? ALL_SUCCESSORS : COMMIT_SUCCESSORS), base), deduplicator));
}
/**
* A traversal of commit history (no content) with deduplication.
* @param start
* @param base
* @param database
* @return
*/
public static Iterator<RevObject> rangeOfCommits(List<ObjectId> start, List<ObjectId> base, ObjectDatabase database, Deduplicator deduplicator) {
return new PostOrderIterator(new ArrayList<ObjectId>(start), database, uniqueWithDeduplicator(blacklist( COMMIT_PARENTS, base), deduplicator));
}
public static Iterator<RevObject> contentsOf(List<ObjectId> needsPrevisit,
ObjectDatabase database, Deduplicator deduplicator) {
return new PostOrderIterator(new ArrayList<ObjectId>(needsPrevisit), database, uniqueWithDeduplicator(COMMIT_SUCCESSORS, deduplicator));
}
/**
* A handle to the object database used for the traversal
*/
private final ObjectDatabase database;
/**
* The collection of ObjectIds that must be visited. It is organized as a list of lists - the
* first entry is always the deepest set of ObjectIds that needs to be processed.
*/
private List<List<ObjectId>> toVisit;
/**
* A flag tracking the state of the traversal. When true, we are building up a queue of objects
* to visit. When false, we are visiting them (aka returning them from the iterator.)
*/
private boolean enqueue;
/**
* The Successors object determining which objects reachable from the current one to enqueue.
*/
private final Successors successors;
/**
* The single, private constructor for PostOrderIterator. Generally it will be more convenient
* to use static factory methods which properly prepare a Successors capturing the traversal
* policy for the iteration.
*
* @param start the initial list of objects to expand out from (these will be the last ones
* actually visited, but the subgraph visited is reachable from this list)
* @param database the objectdatabase used for retrieving objects
* @param successors the traversal policy for this iteration.
*/
private PostOrderIterator(List<ObjectId> start, ObjectDatabase database, Successors successors) {
super();
this.database = database;
this.enqueue = true;
this.successors = successors;
toVisit = new ArrayList<List<ObjectId>>();
toVisit.add(new ArrayList<ObjectId>());
toVisit.get(0).addAll(start);
}
@Override
protected RevObject computeNext() {
while (!toVisit.isEmpty()) {
List<ObjectId> currentList = toVisit.get(0);
if (currentList.isEmpty()) {
// No more ids at this depth - pop a level off of the stack and switch to "visiting"
// mode
enqueue = false;
toVisit.remove(0);
} else {
if (enqueue) {
// We're building up a list of objects to visit, so add all the reachable
// objects from here to the front of the toVisit stack
final ObjectId id = currentList.get(0);
final RevObject object = database.get(id);
final List<ObjectId> next = new ArrayList<ObjectId>();
successors.findSuccessors(object, next);
toVisit.add(0, next);
} else {
// We just visited a node, so switch back to enqueuing mode in order to make
// sure the successors of the next one at this depth are visited.
enqueue = true;
final ObjectId id = currentList.remove(0);
if (successors.previsit(id)) {
return database.get(id);
}
}
}
}
// when the toVisit list becomes empty, we are done
return endOfData();
}
/**
* The Successors interface defines a pluggable strategy for finding successors of (nodes
* reachable from) a GeoGig history object. We follow a combinatorial approach in defining
* Successors - a few immutable basic Successors definitions are provided, and some tools for
* combining them to produce more complex strategies.
*/
private static interface Successors {
/**
* Calculate the list of ObjectIds for objects directly reachable from the given RevObject
* according to this policy.
*
* @param object an object whose successor list should be calculated
* @param successors a List into which successors will be inserted
*/
public void findSuccessors(RevObject object, List<ObjectId> successors);
/**
* Test an ObjectId before the object is visited. Implementors should return true if this
* object should be returned to the client code, false if it should be omitted from results.
*
* @param id the ObjectId of an object that is about to be returned by the iterator
* @return true iff it should be exposed, and false otherwise.
*/
public boolean previsit(ObjectId id);
}
/**
* A Successors strategy for traversing to the parents of commit nodes.
*/
private final static Successors COMMIT_PARENTS = new Successors() {
public void findSuccessors(final RevObject object, final List<ObjectId> successors) {
if (object instanceof RevCommit) {
final RevCommit commit = (RevCommit) object;
successors.addAll(commit.getParentIds());
}
}
@Override
public boolean previsit(ObjectId id) {
return true;
}
};
private final static Successors TAG_COMMIT = new Successors() {
public void findSuccessors(final RevObject object, final List<ObjectId> successors) {
if (object instanceof RevTag) {
final RevTag tag = (RevTag) object;
successors.add(tag.getCommitId());
}
}
@Override
public boolean previsit(ObjectId id) {
return true;
}
};
/**
* A Successors strategy for traversing to the single content tree from a commit node.
*/
private final static Successors COMMIT_TREE = new Successors() {
public void findSuccessors(final RevObject object, final List<ObjectId> successors) {
if (object instanceof RevCommit) {
final RevCommit commit = (RevCommit) object;
successors.add(commit.getTreeId());
}
}
@Override
public boolean previsit(ObjectId id) {
return true;
}
};
/**
* A Successors strategy for traversing to features from a tree node
*/
private final static Successors TREE_FEATURES = new Successors() {
public void findSuccessors(final RevObject object, final List<ObjectId> successors) {
if (object instanceof RevTree) {
final RevTree tree = (RevTree) object;
if (tree.features().isPresent()) {
final Set<ObjectId> seen = new HashSet<ObjectId>();
for (Node n : tree.features().get()) {
if (n.getMetadataId().isPresent()) {
if (seen.add(n.getMetadataId().get())) {
successors.add(n.getMetadataId().get());
}
}
if (seen.add(n.getObjectId())) {
successors.add(n.getObjectId());
}
}
}
}
}
@Override
public boolean previsit(ObjectId id) {
return true;
}
};
/**
* A Successors strategy for traversing to subtrees from a tree node
*/
private final static Successors TREE_SUBTREES = new Successors() {
public void findSuccessors(final RevObject object, final List<ObjectId> successors) {
if (object instanceof RevTree) {
final RevTree tree = (RevTree) object;
if (tree.trees().isPresent()) {
final Set<ObjectId> seen = new HashSet<ObjectId>();
for (Node n : tree.trees().get()) {
if (n.getMetadataId().isPresent()) {
if (seen.add(n.getMetadataId().get())) {
successors.add(n.getMetadataId().get());
}
}
if (seen.add(n.getObjectId())) {
successors.add(n.getObjectId());
}
}
}
}
}
@Override
public boolean previsit(ObjectId id) {
return true;
}
};
/**
* A Successors strategy for traversing to bucket contents from a tree node.
*/
private final static Successors TREE_BUCKETS = new Successors() {
public void findSuccessors(final RevObject object, final List<ObjectId> successors) {
if (object instanceof RevTree) {
final RevTree tree = (RevTree) object;
if (tree.buckets().isPresent()) {
for (Map.Entry<?, Bucket> entry : tree.buckets().get().entrySet()) {
final Bucket bucket = entry.getValue();
successors.add(bucket.id());
}
}
}
}
@Override
public boolean previsit(ObjectId id) {
return true;
}
};
/**
* A factory method for combining zero or more Successors strategies by producing a strategy
* visiting all nodes that would be visited by any of the strategies.
*
* @param chained zero or more Successors strategies
* @return a Successors strategy that visits a node if any constituent strategy would visit that
* node.
*/
private final static Successors combine(final Successors... chained) {
return new Successors() {
public void findSuccessors(final RevObject object, final List<ObjectId> successors) {
for (Successors s : chained) {
s.findSuccessors(object, successors);
}
}
public boolean previsit(ObjectId id) {
for (Successors s : chained) {
if (!s.previsit(id))
return false;
}
return true;
}
};
}
/**
* A factory method for decorating a Successors strategy with uniqueness checking. The
* uniqueness check is implemented by caching the ids of all visited objects - this is exact but
* produces unbounded memory usage.
*
* @param delegate the original Successors strategy
* @return a modified Successors strategy that visits all the same nodes but filters out any
* repetitions.
*/
private final static Successors unique(final Successors delegate) {
return uniqueWithDeduplicator(delegate, new org.locationtech.geogig.storage.memory.HeapDeduplicator());
}
private final static Successors uniqueWithDeduplicator(final Successors delegate, final Deduplicator deduplicator) {
return new Successors() {
public void findSuccessors(final RevObject object, final List<ObjectId> successors) {
if (!deduplicator.isDuplicate(object.getId())) {
final int oldSize = successors.size();
delegate.findSuccessors(object, successors);
deduplicator.removeDuplicates(successors.subList(oldSize, successors.size()));
}
}
public boolean previsit(ObjectId id) {
return (!deduplicator.visit(id)) && delegate.previsit(id);
}
};
}
/**
* A factory method for decorating a Successors strategy with a blacklist. Not only will objects
* in the blacklist be skipped, but also no objects reachable from them will be visited, unless
* they are reachable by another path.
*
* @param delegate the original Successors policy
* @param base a list of blacklisted objectids
* @return a Successors policy for visiting the same nodes as the original policy, but with
*/
private final static Successors blacklist(final Successors delegate, final List<ObjectId> base) {
final Set<ObjectId> baseSet = new HashSet<ObjectId>(base);
return new Successors() {
public void findSuccessors(final RevObject object, final List<ObjectId> successors) {
if (!baseSet.contains(object.getId())) {
final int oldSize = successors.size();
delegate.findSuccessors(object, successors);
successors.subList(oldSize, successors.size()).removeAll(baseSet);
}
}
public boolean previsit(ObjectId id) {
boolean dprevisit = delegate.previsit(id);
return dprevisit && !baseSet.contains(id);
}
};
}
/**
* A traversal policy for visiting all reachable nodes without deduplication
*/
private final static Successors ALL_SUCCESSORS = combine( //
TAG_COMMIT, //
COMMIT_PARENTS, //
COMMIT_TREE, //
TREE_BUCKETS, //
TREE_SUBTREES, //
TREE_FEATURES);
/**
* A traversal policy for visiting all reachable commits without deduplication
*/
private final static Successors COMMIT_SUCCESSORS = combine( //
COMMIT_TREE, //
TREE_BUCKETS, //
TREE_SUBTREES, //
TREE_FEATURES);
}