TupleFilter.java example

Explorer
blazegraph-master
- database-master
package com.bigdata.btree.filter;

import java.util.Iterator;
import java.util.NoSuchElementException;

import org.apache.log4j.Logger;

import com.bigdata.btree.AbstractBTree;
import com.bigdata.btree.AbstractTuple;
import com.bigdata.btree.BTree;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleCursor;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.ITupleSerializer;
import com.bigdata.btree.Tuple;
import com.bigdata.util.BytesUtil;

import cutthecrap.utils.striterators.Filter;
import cutthecrap.utils.striterators.FilterBase;

/**
 * <p>
 * Filter supporting {@link ITupleIterator}s.
 * </p>
 * <p>
 * <strong>Warning: Unlike {@link Filter}, this class correctly uses a second
 * {@link Tuple} instance to perform filtering.<strong> This is necessary since
 * the {@link Tuple} instance for the base {@link ITupleIterator}
 * implementations for the {@link AbstractBTree} is reused by next() on each
 * call and the {@link TupleFilter} uses one-step lookahead. Failure to use a
 * second {@link Tuple} instance will result in <em>overwrite</em> of the
 * current {@link Tuple} with data from the lookahead {@link Tuple}.
 * </p>
 * <p>
 * Note: You must specify {@link IRangeQuery#KEYS} and/or
 * {@link IRangeQuery#VALS} in order to filter on the keys and/or values
 * associated with the visited tuples.
 * </p>
 * <p>
 * Note: YOu must specify {@link IRangeQuery#CURSOR} to enabled
 * {@link Iterator#remove()} for a <em>local</em> {@link BTree}
 * </p>
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 * @param <E>
 *            The type of the elements visited by the iterator (tuples of some
 *            sort).
 */
abstract public class TupleFilter<E> extends FilterBase implements ITupleFilter<E> {

    /**
     * 
     */
    private static final long serialVersionUID = 1L;

    private static transient final Logger log = Logger.getLogger(TupleFilter.class);
  
    public TupleFilter() {

//        this(null/* state */);
//        
//    }
//
//    public TupleFilter(final Object state) {
//                
//        super(state);
        
    }
    
    @SuppressWarnings("unchecked")
    @Override
    public ITupleIterator<E> filterOnce(final Iterator src,Object context) {

        return new TupleFilter.TupleFilterator((ITupleIterator) src, context, this);

    }

    abstract protected boolean isValid(ITuple<E> tuple);

    /**
     * Implementation class knows how to avoid side-effects from the reuse of
     * the same {@link Tuple} instance by the base {@link ITupleIterator} impls.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     * @param <E>
     */
    static public class TupleFilterator<E> implements ITupleIterator<E> {

        /**
         * The source iterator.
         */
        protected final ITupleIterator<E> src;

        /**
         * The context.
         */
        protected final Object context;
        
        protected final TupleFilter<E> filter;
        
        /**
         * The next value to be returned by {@link #next()}.
         */
        private ITuple<E> nextValue = null;

        /**
         * The {@link ITuple} instance that will actually be returned to the
         * caller. The data from {@link #nextValue} is <em>copied</em> into this
         * {@link #returnValue} in order to avoid side-effects from
         * {@link #getNext()}. Those side-effects would otherwise arise because
         * the base {@link ITupleIterator} implementations reuse the same
         * {@link Tuple} instance for each tuple visited by the iterator. A copy
         * of the data must be made in order to avoid side-effects from the
         * one-step lookahead used by the filter.
         */
        final private AbstractTuple<E> returnValue;
        
        public TupleFilterator(final ITupleIterator<E> src, final Object context,
                final TupleFilter<E> filter) {

            this.src = src;
            this.context = context;
            this.filter = filter;

            /*
             * One step lookahead.
             * 
             * Note: This cases to a Tuple because it presumes that we are
             * running directly against an AbstractBTree rather than filtering
             * tuples buffered in a ResultSet on the client. A class cast
             * exception will be thrown in the latter case.
             */
            this.nextValue = getNext();

            if (this.nextValue != null) {
                
                final int sourceIndex = nextValue.getSourceIndex();
                
                final ITupleSerializer tupleSer = nextValue.getTupleSerializer();
                
                // private buffer used to avoid side-effects from getNext()
                this.returnValue = new AbstractTuple<E>(nextValue.flags()) {

                    public int getSourceIndex() {
                        
                        return sourceIndex;
                        
                    }

                    public ITupleSerializer getTupleSerializer() {
                        
                        return tupleSer;
                        
                    }

                };

            } else {

                // nothing to be returned.
                this.returnValue = null;
                
            }
            
        }

        public boolean hasNext() {

            return nextValue != null;

        }

//        @SuppressWarnings("unchecked")
        public ITuple<E> next() {

            if (!hasNext())
                throw new NoSuchElementException();

            // copy data from the lookahead tuple instance.
            returnValue.copyTuple( nextValue );
            
            // one step lookahead.
            nextValue = getNext();

            if(log.isInfoEnabled()) {
                
                log.info("returning: "+returnValue);
                
            }

            visit(returnValue);
            
            // return the private instance containing a copy of the data.
            return (ITuple<E>) returnValue;

        }

        /**
         * Hook for subclasses. This is invoked immediately before
         * {@link #next()} returns. The default implementation is a NOP.
         * 
         * @param tuple
         *            The tuple that will be visited.
         */
        protected void visit(final ITuple<E> tuple) {
           
            // NOP
            
        }
        
        /**
         * Note: {@link #remove()} is supported iff the source iterator is an
         * {@link ITupleCursor} and the underlying {@link AbstractBTree} allows
         * modification.
         * <p>
         * Note: The filter imposes a one-step lookahead means that invoking
         * {@link #remove()} on the source iterator would cause the wrong
         * element to be removed from the source iterator. Therefore this
         * operation is disabled unless the {@link ITupleCursor}.
         * {@link ITupleCursor} is safe for traversal with concurrent
         * modification, so we can just remove the key from the source index.
         * <p>
         * Note: An {@link ITupleCursor} can be requested either at the
         * top-level or by specifying {@link IRangeQuery#CURSOR} to
         * {@link TupleFilter#TupleFilter(int)}.
         * 
         * @throws UnsupportedOperationException
         *             unless the source iterator is an {@link ITupleCursor}.
         */
        public void remove() {

            if(src instanceof ITupleCursor<?>) {

                /*
                 * The ITupleCursor supports traversal with concurrent
                 * modification. Therefore we can remove the correct entry from
                 * the underlying B+Tree by directing the remove(key) request to
                 * the index implementation itself.
                 */
                
                final byte[] key = returnValue.getKey();
                
                if(log.isInfoEnabled()) {
                    
                    log.info("key=" + BytesUtil.toString(key));
                    
                }
                
                ((ITupleCursor<E>) src).getIndex().remove(key);
                
            } else {

                /*
                 * Otherwise the one step lookahead imposed by the filter means
                 * that the source iterator is already positioned on a successor
                 * of the current element. If we were to use [src.remove()]
                 * here, it would cause a successor of the current element to be
                 * removed which is not at all what we want.
                 */
                
                throw new UnsupportedOperationException(
                        "Source iterator does not implement "
                                + ITupleCursor.class.getName());
                
            }

        }

        /**
         * One step look ahead.
         * 
         * @return The next object to be visited.
         */
        protected ITuple<E> getNext() {

            while (src.hasNext()) {

                final ITuple<E> next = src.next();

                if (!filter.isValid(next)) {

                    if(log.isInfoEnabled()) {
                        
                        log.info("rejected  : "+next);
                        
                    }
                    
                    continue;
                    
                }
                
                if(log.isInfoEnabled()) {
                    
                    log.info("will visit: "+next);
                    
                }
                
                return next;
                
            }

            if(log.isInfoEnabled()) {
                
                log.info("Source is exhausted.");
                
            }
            
            // source is exhausted.
            return null;

        }

    }

}