package com.bigdata.btree.filter;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.log4j.Logger;
import com.bigdata.btree.AbstractBTree;
import com.bigdata.btree.AbstractTuple;
import com.bigdata.btree.BTree;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleCursor;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.ITupleSerializer;
import com.bigdata.btree.Tuple;
import com.bigdata.util.BytesUtil;
import cutthecrap.utils.striterators.Filter;
import cutthecrap.utils.striterators.FilterBase;
/**
* <p>
* Filter supporting {@link ITupleIterator}s.
* </p>
* <p>
* <strong>Warning: Unlike {@link Filter}, this class correctly uses a second
* {@link Tuple} instance to perform filtering.<strong> This is necessary since
* the {@link Tuple} instance for the base {@link ITupleIterator}
* implementations for the {@link AbstractBTree} is reused by next() on each
* call and the {@link TupleFilter} uses one-step lookahead. Failure to use a
* second {@link Tuple} instance will result in <em>overwrite</em> of the
* current {@link Tuple} with data from the lookahead {@link Tuple}.
* </p>
* <p>
* Note: You must specify {@link IRangeQuery#KEYS} and/or
* {@link IRangeQuery#VALS} in order to filter on the keys and/or values
* associated with the visited tuples.
* </p>
* <p>
* Note: YOu must specify {@link IRangeQuery#CURSOR} to enabled
* {@link Iterator#remove()} for a <em>local</em> {@link BTree}
* </p>
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
* @param <E>
* The type of the elements visited by the iterator (tuples of some
* sort).
*/
abstract public class TupleFilter<E> extends FilterBase implements ITupleFilter<E> {
/**
*
*/
private static final long serialVersionUID = 1L;
private static transient final Logger log = Logger.getLogger(TupleFilter.class);
public TupleFilter() {
// this(null/* state */);
//
// }
//
// public TupleFilter(final Object state) {
//
// super(state);
}
@SuppressWarnings("unchecked")
@Override
public ITupleIterator<E> filterOnce(final Iterator src,Object context) {
return new TupleFilter.TupleFilterator((ITupleIterator) src, context, this);
}
abstract protected boolean isValid(ITuple<E> tuple);
/**
* Implementation class knows how to avoid side-effects from the reuse of
* the same {@link Tuple} instance by the base {@link ITupleIterator} impls.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @param <E>
*/
static public class TupleFilterator<E> implements ITupleIterator<E> {
/**
* The source iterator.
*/
protected final ITupleIterator<E> src;
/**
* The context.
*/
protected final Object context;
protected final TupleFilter<E> filter;
/**
* The next value to be returned by {@link #next()}.
*/
private ITuple<E> nextValue = null;
/**
* The {@link ITuple} instance that will actually be returned to the
* caller. The data from {@link #nextValue} is <em>copied</em> into this
* {@link #returnValue} in order to avoid side-effects from
* {@link #getNext()}. Those side-effects would otherwise arise because
* the base {@link ITupleIterator} implementations reuse the same
* {@link Tuple} instance for each tuple visited by the iterator. A copy
* of the data must be made in order to avoid side-effects from the
* one-step lookahead used by the filter.
*/
final private AbstractTuple<E> returnValue;
public TupleFilterator(final ITupleIterator<E> src, final Object context,
final TupleFilter<E> filter) {
this.src = src;
this.context = context;
this.filter = filter;
/*
* One step lookahead.
*
* Note: This cases to a Tuple because it presumes that we are
* running directly against an AbstractBTree rather than filtering
* tuples buffered in a ResultSet on the client. A class cast
* exception will be thrown in the latter case.
*/
this.nextValue = getNext();
if (this.nextValue != null) {
final int sourceIndex = nextValue.getSourceIndex();
final ITupleSerializer tupleSer = nextValue.getTupleSerializer();
// private buffer used to avoid side-effects from getNext()
this.returnValue = new AbstractTuple<E>(nextValue.flags()) {
public int getSourceIndex() {
return sourceIndex;
}
public ITupleSerializer getTupleSerializer() {
return tupleSer;
}
};
} else {
// nothing to be returned.
this.returnValue = null;
}
}
public boolean hasNext() {
return nextValue != null;
}
// @SuppressWarnings("unchecked")
public ITuple<E> next() {
if (!hasNext())
throw new NoSuchElementException();
// copy data from the lookahead tuple instance.
returnValue.copyTuple( nextValue );
// one step lookahead.
nextValue = getNext();
if(log.isInfoEnabled()) {
log.info("returning: "+returnValue);
}
visit(returnValue);
// return the private instance containing a copy of the data.
return (ITuple<E>) returnValue;
}
/**
* Hook for subclasses. This is invoked immediately before
* {@link #next()} returns. The default implementation is a NOP.
*
* @param tuple
* The tuple that will be visited.
*/
protected void visit(final ITuple<E> tuple) {
// NOP
}
/**
* Note: {@link #remove()} is supported iff the source iterator is an
* {@link ITupleCursor} and the underlying {@link AbstractBTree} allows
* modification.
* <p>
* Note: The filter imposes a one-step lookahead means that invoking
* {@link #remove()} on the source iterator would cause the wrong
* element to be removed from the source iterator. Therefore this
* operation is disabled unless the {@link ITupleCursor}.
* {@link ITupleCursor} is safe for traversal with concurrent
* modification, so we can just remove the key from the source index.
* <p>
* Note: An {@link ITupleCursor} can be requested either at the
* top-level or by specifying {@link IRangeQuery#CURSOR} to
* {@link TupleFilter#TupleFilter(int)}.
*
* @throws UnsupportedOperationException
* unless the source iterator is an {@link ITupleCursor}.
*/
public void remove() {
if(src instanceof ITupleCursor<?>) {
/*
* The ITupleCursor supports traversal with concurrent
* modification. Therefore we can remove the correct entry from
* the underlying B+Tree by directing the remove(key) request to
* the index implementation itself.
*/
final byte[] key = returnValue.getKey();
if(log.isInfoEnabled()) {
log.info("key=" + BytesUtil.toString(key));
}
((ITupleCursor<E>) src).getIndex().remove(key);
} else {
/*
* Otherwise the one step lookahead imposed by the filter means
* that the source iterator is already positioned on a successor
* of the current element. If we were to use [src.remove()]
* here, it would cause a successor of the current element to be
* removed which is not at all what we want.
*/
throw new UnsupportedOperationException(
"Source iterator does not implement "
+ ITupleCursor.class.getName());
}
}
/**
* One step look ahead.
*
* @return The next object to be visited.
*/
protected ITuple<E> getNext() {
while (src.hasNext()) {
final ITuple<E> next = src.next();
if (!filter.isValid(next)) {
if(log.isInfoEnabled()) {
log.info("rejected : "+next);
}
continue;
}
if(log.isInfoEnabled()) {
log.info("will visit: "+next);
}
return next;
}
if(log.isInfoEnabled()) {
log.info("Source is exhausted.");
}
// source is exhausted.
return null;
}
}
}