/**
* Copyright (C) 2009-2013 FoundationDB, LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.foundationdb.qp.operator;
import com.foundationdb.qp.row.Row;
import com.foundationdb.qp.rowtype.RowType;
import com.foundationdb.server.collation.AkCollator;
import com.foundationdb.server.explain.*;
import com.foundationdb.server.types.value.ValueSource;
import com.foundationdb.server.types.value.ValueSources;
import com.foundationdb.util.ArgumentValidation;
import com.foundationdb.util.BloomFilter;
import com.foundationdb.util.tap.InOutTap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
/**
* <h1>Overview</h1>
* <p/>
* Using_BloomFilter loads a bloom filter for use by Select_BloomFilter.
* <p/>
* <h1>Arguments</h1>
* <p/>
* <li><b>Operator filterInput:</b></li> Stream of rows used to load the filter
* <li><b>long estimatedRowCount,:</b></li> Estimated count of rows from filterInput
* <li><b>int filterBindingPosition,:</b></li> Position in the query context that will contain the bloom filter
* <li><b>Operator streamInput: </b></li> Stream of rows to be filtered
* <p/>
* <h1>Behavior</h1>
* <p/>
* When a Using_BloomFilter cursor is opened, all rows from the filterInput operator will be consumed and used to
* load a bloom filter. The bloom filter will be set up to accomodate up to estimatedRowCount rows. If this number
* is exceeded, the filter will be grown and the filterInput will be scanned a second time.
* <p/>
* Besides loading the bloom filter, all operations on a Using_BloomFilter cursor are delegated to the streamInput's
* cursor.
* <p/>
* <h1>Output</h1>
* <p/>
* Output from the streamInput cursor is passed on.
* <p/>
* <h1>Assumptions</h1>
* <p/>
* None.
* <p/>
* <h1>Performance</h1>
* <p/>
* The filterInput stream will be consumed completely each time this operator's cursor is opened. It may be consumed
* twice if estimatedRowCount is too low.
* <p/>
* <h1>Memory Requirements</h1>
* <p/>
* The bloom filter uses memory proportional to the number of rows scanned from filterInput, typically 2-4 bytes.
*/
class Using_BloomFilter extends Operator
{
// Object interface
@Override
public String toString()
{
return getClass().getSimpleName();
}
// Operator interface
@Override
public void findDerivedTypes(Set<RowType> derivedTypes)
{
filterInput.findDerivedTypes(derivedTypes);
streamInput.findDerivedTypes(derivedTypes);
}
@Override
protected Cursor cursor(QueryContext context, QueryBindingsCursor bindingsCursor)
{
return new Execution(context, streamInput.cursor(context, bindingsCursor));
}
@Override
public List<Operator> getInputOperators()
{
return Arrays.asList(filterInput, streamInput);
}
@Override
public String describePlan()
{
return String.format("%s\n%s", describePlan(filterInput), describePlan(streamInput));
}
// Using_BloomFilter interface
public Using_BloomFilter(Operator filterInput,
RowType filterRowType,
long estimatedRowCount,
int filterBindingPosition,
Operator streamInput,
List<AkCollator> collators)
{
ArgumentValidation.notNull("filterInput", filterInput);
ArgumentValidation.notNull("filterRowType", filterRowType);
ArgumentValidation.isGTE("estimatedRowCount", estimatedRowCount, 0);
ArgumentValidation.isGTE("filterBindingPosition", filterBindingPosition, 0);
ArgumentValidation.notNull("streamInput", streamInput);
if (collators != null)
ArgumentValidation.isEQ("collators length", collators.size(), filterRowType.nFields());
this.filterInput = filterInput;
this.filterRowType = filterRowType;
this.estimatedRowCount = estimatedRowCount;
this.filterBindingPosition = filterBindingPosition;
this.streamInput = streamInput;
this.collators = collators;
}
// For use by this class
private AkCollator collator(int f)
{
return collators == null ? null : collators.get(f);
}
// Class state
private static final InOutTap TAP_OPEN = OPERATOR_TAP.createSubsidiaryTap("operator: Using_BloomFilter open");
private static final InOutTap TAP_NEXT = OPERATOR_TAP.createSubsidiaryTap("operator: Using_BloomFilter next");
private static final Logger LOG = LoggerFactory.getLogger(Using_BloomFilter.class);
private static final double ERROR_RATE = 0.0001; // Bloom filter will use 19.17 bits per key
// Object state
private final Operator filterInput;
private final RowType filterRowType;
private final long estimatedRowCount;
private final int filterBindingPosition;
private final Operator streamInput;
private final List<AkCollator> collators;
@Override
public CompoundExplainer getExplainer(ExplainContext context) {
Attributes atts = new Attributes();
atts.put(Label.NAME, PrimitiveExplainer.getInstance(getName()));
atts.put(Label.BINDING_POSITION, PrimitiveExplainer.getInstance(filterBindingPosition));
atts.put(Label.INPUT_OPERATOR, filterInput.getExplainer(context));
atts.put(Label.INPUT_OPERATOR, streamInput.getExplainer(context));
return new CompoundExplainer(Type.BLOOM_FILTER, atts);
}
// Inner classes
private class Execution extends ChainedCursor
{
// Cursor interface
@Override
public void open()
{
TAP_OPEN.in();
try {
// Usually super.open called first, but needs to be done
// opposite order here to allow Select_BloomFilter access
// to the filled BloomFilter in the bindings.
BloomFilter filter = loadBloomFilter();
bindings.setBloomFilter(filterBindingPosition, filter);
super.open();
} finally {
TAP_OPEN.out();
}
}
@Override
public Row next()
{
if (TAP_NEXT_ENABLED) {
TAP_NEXT.in();
}
try {
Row output = input.next();
if (LOG_EXECUTION) {
LOG.debug("Using_BloomFilter: yield {}", output);
}
return output;
} finally {
if (TAP_NEXT_ENABLED) {
TAP_NEXT.out();
}
}
}
// Execution interface
Execution(QueryContext context, Cursor input)
{
super(context, input);
}
// For use by this class
private BloomFilter loadBloomFilter()
{
BloomFilter filter = new BloomFilter(estimatedRowCount, ERROR_RATE);
long rows = loadBloomFilter(filter);
if (rows > estimatedRowCount) {
// Do it again, but size the filter based on the actual row count
filter = new BloomFilter(rows, ERROR_RATE);
loadBloomFilter(filter);
}
return filter;
}
private long loadBloomFilter(BloomFilter filter)
{
int fields = filterRowType.nFields();
int rows = 0;
QueryBindingsCursor bindingsCursor = new SingletonQueryBindingsCursor(bindings);
Cursor loadCursor = filterInput.cursor(context, bindingsCursor);
loadCursor.openTopLevel();
Row row;
while ((row = loadCursor.next()) != null) {
int h = 0;
for (int f = 0; f < fields; f++) {
ValueSource valueSource = row.value(f);
h = h ^ ValueSources.hash(valueSource, collator(f));
}
filter.add(h);
rows++;
}
loadCursor.closeTopLevel();
return rows;
}
}
}