/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.query.op; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import com.addthis.bundle.core.Bundle; import com.addthis.bundle.core.BundleField; import com.addthis.bundle.table.DataTable; import com.addthis.bundle.table.DataTableFactory; import com.addthis.bundle.util.BundleColumnBinder; import com.addthis.bundle.value.ValueObject; import com.addthis.hydra.data.query.AbstractTableOp; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.netty.channel.ChannelProgressivePromise; /** * <p>This query operation <span class="hydra-summary">removes (key, value) pairs * that are observed once</span>. * <p/> * <p>The syntax for this operation is rmsing=N:M where N is the column number for * the key column and M is the column number for the value column. Each (key, value) * pair that exists only once in the output rows is removed. * * @user-reference * @hydra-name rmsing */ public class OpRemoveSingletons extends AbstractTableOp { private Logger log = LoggerFactory.getLogger(OpRemoveSingletons.class); int keyColumn; int valColumn; public OpRemoveSingletons(DataTableFactory factory, String args, ChannelProgressivePromise queryPromise) { super(factory, queryPromise); try { String[] opt = args.split(":"); keyColumn = opt.length >= 1 ? Integer.parseInt(opt[0]) : 0; valColumn = opt.length >= 2 ? Integer.parseInt(opt[1]) : 1; } catch (Exception ex) { log.warn("", ex); } } /** * Strip out rows with keys that map to a single value * * @param result Input data table * @return Final data table */ @Override public DataTable tableOp(final DataTable result) { if (result == null || result.size() == 0) { return result; } Long[] data = new Long[result.size()]; BundleField[] fields = new BundleColumnBinder(result.get(0)).getFields(); BundleField keyField = fields[keyColumn]; BundleField valField = fields[valColumn]; ValueObject oldKey = null; Set<String> vals = new HashSet<>(); List<Bundle> rows = new ArrayList<>(); DataTable table = createTable(0); for (int i = 0; i < result.size(); i++) { Bundle row = result.get(i); ValueObject newKey = row.getValue(keyField); if (oldKey == null) { oldKey = newKey; } else if ((!oldKey.equals(newKey)) || i == result.size() - 1) { if (i == result.size() - 1) { rows.add(row); } if (vals.size() > 1) { for (Bundle storedRow : rows) { table.append(storedRow); } } oldKey = newKey; vals.clear(); rows.clear(); } rows.add(row); String newVal = row.getValue(valField).asString().toString(); vals.add(newVal); } return table; } }