/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.query.op; import com.addthis.basis.util.LessStrings; import com.addthis.bundle.core.Bundle; import com.addthis.bundle.table.DataTable; import com.addthis.bundle.table.DataTableFactory; import com.addthis.bundle.util.BundleColumnBinder; import com.addthis.bundle.value.ValueObject; import com.addthis.hydra.data.query.AbstractTableOp; import com.addthis.hydra.data.tree.prop.DataSeen; import com.addthis.hydra.store.util.SeenFilterBasic; import io.netty.channel.ChannelProgressivePromise; /** * <p>This query operation <span class="hydra-summary">applies a bloom filter to a column</span>. * <p/> * <p>The syntax for this operation is seen=column:bits:bitsper:hash. 'column' is the column * number. 'bits' is the cardinality of the bloom filter (total number of bits allocated * to the filter). It must be 32 or greater. 'bitsper' is the number of hash function * evaluations for each insertion operation. This parameter is usually referred to as * the "k" parameter in the literature. 'hash' is the type of hash function to apply. * The types of hash functions are listed in {@link SeenFilterBasic#hash hash}. * * @user-reference * @hydra-name seen */ public class OpSeen extends AbstractTableOp { /** * Columns to summarize */ private int column; private int bits; private int bitsper; // defaults to 4 private int hash; // defaults to 4 public OpSeen(DataTableFactory tableFactory, String args, ChannelProgressivePromise queryPromise) { super(tableFactory, queryPromise); String[] v = LessStrings.splitArray(args, ":"); this.column = Integer.parseInt(v[0]); this.bits = Integer.parseInt(v[1]); this.bitsper = v.length > 2 ? Integer.parseInt(v[2]) : 4; this.hash = v.length > 3 ? Integer.parseInt(v[3]) : 4; } @Override public DataTable tableOp(DataTable result) { if (column >= result.size()) { return result; } SeenFilterBasic<String> seen = new SeenFilterBasic<>(bits, bitsper, hash); ValueObject obj = new DataSeen.ValueBloom(seen); BundleColumnBinder binder = getSourceColumnBinder(result); for (Bundle row : result) { seen.setSeen(binder.getColumn(row, column).toString()); } DataTable ret = createTable(1); Bundle row = ret.createBundle(); binder.appendColumn(row, obj); ret.append(row); return ret; } }