package prefuse.data.util; import java.util.Comparator; import java.util.Iterator; import prefuse.data.Table; import prefuse.data.expression.AndPredicate; import prefuse.data.expression.ColumnExpression; import prefuse.data.expression.ComparisonPredicate; import prefuse.data.expression.Expression; import prefuse.data.expression.ExpressionAnalyzer; import prefuse.data.expression.NotPredicate; import prefuse.data.expression.OrPredicate; import prefuse.data.expression.Predicate; import prefuse.data.expression.RangePredicate; import prefuse.data.tuple.TupleSet; import prefuse.util.PrefuseConfig; import prefuse.util.collections.CompositeIntIterator; import prefuse.util.collections.IntIterator; /** * Factory class that creates optimized filter iterators. When possible, * this factory will attempt to create an optimized query plan by using * available indexes, in many incrasing performance by only visiting * the tuples which will pass the filter condition. * * @author <a href="http://jheer.org">jeffrey heer</a> */ public class FilterIteratorFactory { private static final int OPTIMIZATION_THRESHOLD = PrefuseConfig.getInt("data.filter.optimizeThreshold"); // we can stash our query plan generation and optimization here to deal // with it all in one spot, and keep the rest of the classes clean /** * Get a filtered iterator over the tuples in the given set, * filtered by the given predicate. * @param ts the TupleSet to iterate over * @param p the filter predicate * @return a filtered iterator over the tuples */ public static Iterator tuples(TupleSet ts, Predicate p) { // no predicate means no filtering if ( p == null ) return ts.tuples(); // attempt to generate an optimized query plan Iterator iter = null; if ( ts instanceof Table ) { Table t = (Table)ts; IntIterator ii = getOptimizedIterator(t,p); if ( ii != null ) iter = t.tuples(ii); } // optimization fails, scan the entire table if ( iter == null ) { iter = new FilterIterator(ts.tuples(), p); } return iter; } /** * Get a filtered iterator over the rows in the given table, * filtered by the given predicate. * @param t the Table to iterate over * @param p the filter predicate * @return a filtered iterator over the table rows */ public static IntIterator rows(Table t, Predicate p) { // attempt to generate an optimized query plan IntIterator iter = null; iter = getOptimizedIterator(t, p); // optimization fails, scan the entire table if ( iter == null ) { iter = new FilterRowIterator(t.rows(), t, p); } return iter; } /** * Get an optimized iterator over the rows of a table, if possible. * @param t the Table to iterator over * @param p the filter predicate * @return an optimized iterator, or null if no optimization was found */ protected static IntIterator getOptimizedIterator(Table t, Predicate p) { if ( t.getRowCount() < OPTIMIZATION_THRESHOLD ) return null; // avoid overhead for small tables if ( p instanceof ColumnExpression ) { // try to optimize a boolean column return getColumnIterator(t, ((ColumnExpression)p).getColumnName(), true); } else if ( p instanceof NotPredicate ) { // try to optimize the negation a boolean column Predicate pp = ((NotPredicate)p).getPredicate(); if ( pp instanceof ColumnExpression ) { return getColumnIterator(t, ((ColumnExpression)pp).getColumnName(), false); } } else if ( p instanceof AndPredicate ) { // try to optimize an and clause return getAndIterator(t, (AndPredicate)p); } else if ( p instanceof OrPredicate ) { // try to optimize an or clause return getOrIterator(t, (OrPredicate)p); } else if ( p instanceof ComparisonPredicate ) { // try to optimize a comparison (=, !=, <, > ,etc) return getComparisonIterator(t,(ComparisonPredicate)p); } else if ( p instanceof RangePredicate ) { // try to optimize a bounded range of values return getRangeIterator(t, (RangePredicate)p); } return null; } protected static IntIterator getColumnIterator( Table t, String field, boolean val) { if ( t.getColumnType(field) != boolean.class ) return null; // only works for boolean-valued columns Index index = t.getIndex(field); if ( index == null ) { return null; } else { return index.rows(val); } } protected static IntIterator getOrIterator(Table t, OrPredicate op) { int size = op.size(); if ( size > 1 ) { // if all subclauses can be optimized, we can optimize the query IntIterator[] rows = new IntIterator[size]; for ( int i=0; i<rows.length; ++i ) { rows[i] = getOptimizedIterator(t, op.get(i)); // all clauses must be optimized to avoid linear scan if ( rows[i] == null ) return null; } // group iterators, and filter for uniqueness return new UniqueRowIterator(new CompositeIntIterator(rows)); } else if ( size == 1 ) { // only one clause, optimize for that return getOptimizedIterator(t, op.get(0)); } else { // no woman, no cry return null; } } protected static IntIterator getAndIterator(Table t, AndPredicate ap) { // possible TODO: add scoring to select best optimized iterator // for now just work from the end backwards and take the first // optimized iterator we find IntIterator rows = null; Predicate clause = null; for ( int i=ap.size(); --i >= 0; ) { clause = ap.get(i); if ( (rows=getOptimizedIterator(t,clause)) != null ) break; } // exit if we didn't optimize if ( rows == null ) return null; // if only one clause, no extras needed if ( ap.size() == 1 ) return rows; // otherwise get optimized source, run through other clauses return new FilterRowIterator(rows, t, ap.getSubPredicate(clause)); } protected static IntIterator getComparisonIterator(Table t, ComparisonPredicate cp) { Expression l = cp.getLeftExpression(); Expression r = cp.getRightExpression(); int operation = cp.getOperation(); // not equals operations aren't handled by the index if ( operation == ComparisonPredicate.NEQ ) return null; ColumnExpression col; Expression lit; // make sure columns are of the right type if (l instanceof ColumnExpression && !ExpressionAnalyzer.hasDependency(r)) { col = (ColumnExpression)l; lit = r; } else if (r instanceof ColumnExpression && !ExpressionAnalyzer.hasDependency(l)) { col = (ColumnExpression)r; lit = l; } else { return null; } // if table has index of the right type, use it Comparator cmp = cp.getComparator(); Index index = t.getIndex(col.getColumnName()); if ( index == null || !cmp.equals(index.getComparator()) ) return null; Class ltype = lit.getClass(); if ( ltype == int.class ) { int val = lit.getInt(null); // literal value, so null is safe switch ( operation ) { case ComparisonPredicate.LT: return index.rows(Integer.MIN_VALUE, val, Index.TYPE_AIE); case ComparisonPredicate.GT: return index.rows(val, Integer.MAX_VALUE, Index.TYPE_AEI); case ComparisonPredicate.EQ: return index.rows(val, val, Index.TYPE_AII); case ComparisonPredicate.LTEQ: return index.rows(Integer.MIN_VALUE, val, Index.TYPE_AII); case ComparisonPredicate.GTEQ: return index.rows(val, Integer.MAX_VALUE, Index.TYPE_AII); default: throw new IllegalStateException(); // should never occur } } else if ( ltype == long.class ) { long val = lit.getLong(null); // literal value, so null is safe switch ( operation ) { case ComparisonPredicate.LT: return index.rows(Long.MIN_VALUE, val, Index.TYPE_AIE); case ComparisonPredicate.GT: return index.rows(val, Long.MAX_VALUE, Index.TYPE_AEI); case ComparisonPredicate.EQ: return index.rows(val, val, Index.TYPE_AII); case ComparisonPredicate.LTEQ: return index.rows(Long.MIN_VALUE, val, Index.TYPE_AII); case ComparisonPredicate.GTEQ: return index.rows(val, Long.MAX_VALUE, Index.TYPE_AII); default: throw new IllegalStateException(); // should never occur } } else if ( ltype == float.class ) { float val = lit.getFloat(null); // literal value, so null is safe switch ( operation ) { case ComparisonPredicate.LT: return index.rows(Float.MIN_VALUE, val, Index.TYPE_AIE); case ComparisonPredicate.GT: return index.rows(val, Float.MAX_VALUE, Index.TYPE_AEI); case ComparisonPredicate.EQ: return index.rows(val, val, Index.TYPE_AII); case ComparisonPredicate.LTEQ: return index.rows(Float.MIN_VALUE, val, Index.TYPE_AII); case ComparisonPredicate.GTEQ: return index.rows(val, Float.MAX_VALUE, Index.TYPE_AII); default: throw new IllegalStateException(); // should never occur } } else if ( ltype == double.class ) { double val = lit.getDouble(null); // literal value, so null is safe switch ( operation ) { case ComparisonPredicate.LT: return index.rows(Double.MIN_VALUE, val, Index.TYPE_AIE); case ComparisonPredicate.GT: return index.rows(val, Double.MAX_VALUE, Index.TYPE_AEI); case ComparisonPredicate.EQ: return index.rows(val, val, Index.TYPE_AII); case ComparisonPredicate.LTEQ: return index.rows(Double.MIN_VALUE, val, Index.TYPE_AII); case ComparisonPredicate.GTEQ: return index.rows(val, Double.MAX_VALUE, Index.TYPE_AII); default: throw new IllegalStateException(); // should never occur } } else { Object val = lit.get(null); // literal value, so null is safe switch ( operation ) { case ComparisonPredicate.LT: return index.rows(null, val, Index.TYPE_AIE); case ComparisonPredicate.GT: return index.rows(val, null, Index.TYPE_AEI); case ComparisonPredicate.EQ: return index.rows(val, val, Index.TYPE_AII); case ComparisonPredicate.LTEQ: return index.rows(null, val, Index.TYPE_AII); case ComparisonPredicate.GTEQ: return index.rows(val, null, Index.TYPE_AII); default: throw new IllegalStateException(); // should never occur } } } protected static IntIterator getRangeIterator(Table t, RangePredicate rp) { ColumnExpression col; Expression l, r; // make sure columns are of the right type if ( !(rp.getMiddleExpression() instanceof ColumnExpression) || ExpressionAnalyzer.hasDependency(rp.getLeftExpression()) || ExpressionAnalyzer.hasDependency(rp.getRightExpression()) ) { return null; } // assign variables col = (ColumnExpression)rp.getMiddleExpression(); l = rp.getLeftExpression(); r = rp.getRightExpression(); // if table has index of the right type, use it Comparator cmp = rp.getComparator(); Index index = t.getIndex(col.getColumnName()); if ( index == null || !cmp.equals(index.getComparator()) ) return null; int operation = rp.getOperation(); Class ltype = t.getColumnType(col.getColumnName()); // TODO safety check literal types // get the index type int indexType; switch ( operation ) { case RangePredicate.IN_IN: indexType = Index.TYPE_AII; break; case RangePredicate.IN_EX: indexType = Index.TYPE_AIE; break; case RangePredicate.EX_IN: indexType = Index.TYPE_AEI; break; case RangePredicate.EX_EX: indexType = Index.TYPE_AEE; break; default: throw new IllegalStateException(); // should never occur } // get the indexed rows if ( ltype == int.class ) { return index.rows(l.getInt(null), r.getInt(null), indexType); } else if ( ltype == long.class ) { return index.rows(l.getLong(null), r.getLong(null), indexType); } else if ( ltype == float.class ) { return index.rows(l.getFloat(null), r.getFloat(null), indexType); } else if ( ltype == double.class ) { return index.rows(l.getDouble(null), r.getDouble(null), indexType); } else { return index.rows(l.get(null), r.get(null), indexType); } } } // end of class FilterIteratorFactory