package com.bigdata.bop.ap.filter; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import com.bigdata.bop.BOp; import com.bigdata.bop.HashMapAnnotations; import com.bigdata.rdf.spo.SPO; import com.bigdata.striterator.IChunkConverter; import com.bigdata.striterator.MergeFilter; import cutthecrap.utils.striterators.Filter; import cutthecrap.utils.striterators.Filterator; import cutthecrap.utils.striterators.IPropertySet; /** * A DISTINCT operator based for elements in a relation. The operator is based * on an in-memory hash table. * <p> * Note: This is used for the in-memory {@link SPO} distinct filter, but * it is more general and can be applied to any data type that can be * inserted into a set. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z * thompsonbry $ * * @todo Extract a common interface or metadata for all DISTINCT element filters * (in memory hash map, persistence capable hash map, distributed hash * map). * * @todo Reconcile with {@link IChunkConverter}, * {@link com.bigdata.striterator.DistinctFilter} (handles solutions) and * {@link MergeFilter} (handles comparables), * {@link com.bigdata.rdf.spo.DistinctSPOIterator}, etc. */ public class DistinctFilter extends BOpFilterBase { /** * */ private static final long serialVersionUID = 1L; public interface Annotations extends BOpFilter.Annotations, HashMapAnnotations { } /** * A instance using the default configuration for the in memory hash map. */ public static DistinctFilter newInstance() { return new DistinctFilter(BOp.NOARGS, BOp.NOANNS); } /** * Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}. */ public DistinctFilter(final DistinctFilter op) { super(op); } /** * Required shallow copy constructor. */ public DistinctFilter(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); } // /** // * @see Annotations#INITIAL_CAPACITY // */ // public int getInitialCapacity() { // // return getProperty(Annotations.INITIAL_CAPACITY, // Annotations.DEFAULT_INITIAL_CAPACITY); // // } // // /** // * @see Annotations#LOAD_FACTOR // */ // public float getLoadFactor() { // // return getProperty(Annotations.LOAD_FACTOR, // Annotations.DEFAULT_LOAD_FACTOR); // // } @Override final protected Iterator filterOnce(Iterator src, final Object context) { return new Filterator(src, context, new DistinctFilterImpl(this)); } /** * DISTINCT filter based on Java heap data structures. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public static class DistinctFilterImpl extends Filter { private static final long serialVersionUID = 1L; /** * Note: Iterators are single threaded so we do not need to use a * {@link ConcurrentHashMap} here. */ @SuppressWarnings("rawtypes") private final HashSet members; @SuppressWarnings("unchecked") static private <T> T getProperty(final IPropertySet pset, final String name, final T defaultValue) { final Object val = pset.getProperty(name); if (val != null) return (T) val; return defaultValue; } /** * DISTINCT filter based on Java heap data structures. * * @param propertySet * Used to configured the DISTINCT filter. * * @see DistinctFilter.Annotations */ @SuppressWarnings("rawtypes") public DistinctFilterImpl(final IPropertySet propertySet) { final int initialCapacity = getProperty(propertySet, Annotations.INITIAL_CAPACITY, Annotations.DEFAULT_INITIAL_CAPACITY); final float loadFactor = getProperty(propertySet, Annotations.LOAD_FACTOR, Annotations.DEFAULT_LOAD_FACTOR); members = new HashSet(initialCapacity, loadFactor); } @SuppressWarnings("unchecked") @Override public boolean isValid(final Object obj) { return members.add(obj); } } }