package com.tinkerpop.pipes.filter; import com.tinkerpop.pipes.AbstractPipe; import com.tinkerpop.pipes.PipeFunction; import java.util.LinkedHashSet; import java.util.Set; /** * The DuplicateFilterPipe will not allow a duplicate object to pass through it. * This is accomplished by the Pipe maintaining an internal HashSet that is used to store a history of previously seen objects. * Thus, the more unique objects that pass through this Pipe, the slower it becomes as a log_2 index is checked for every object. * Also, beware of OutOfMemoryExceptions as if the number of distinct objects is too great, then the HashSet will overflow memory. * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public class DuplicateFilterPipe<S> extends AbstractPipe<S, S> implements FilterPipe<S> { private final Set historySet = new LinkedHashSet(); private final PipeFunction<S, ?> function; public DuplicateFilterPipe() { this.function = null; } public DuplicateFilterPipe(final PipeFunction<S, ?> function) { this.function = function; } protected S processNextStart() { while (true) { final S s = this.starts.next(); Object t; if (null != this.function) { t = this.function.compute(s); } else { t = s; } if (this.historySet.add(t)) { return s; } } } public void reset() { this.historySet.clear(); super.reset(); } }