/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.query.op;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import com.addthis.basis.util.LessStrings;
import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.core.BundleField;
import com.addthis.bundle.table.DataTable;
import com.addthis.bundle.table.DataTableFactory;
import com.addthis.bundle.util.BundleColumnBinder;
import com.addthis.bundle.util.ValueUtil;
import com.addthis.bundle.value.ValueFactory;
import com.addthis.hydra.data.query.AbstractTableOp;
import com.google.common.base.Objects;
import io.netty.channel.ChannelProgressivePromise;
/**
* Treat columns as belonging to a frequency table and try to
* calculate percentiles and such
*/
public class OpFrequencyTable extends AbstractTableOp {
private String[] cols;
private int valueIndex;
private int freqIndex;
private Double[] desiredPercentiles = new Double[1]; // boo boxing
private boolean appendTotal = false;
// foo=0,1,2,3:4,5:0.99,p12
public OpFrequencyTable(DataTableFactory processor, String args, ChannelProgressivePromise queryPromise) {
super(processor, queryPromise);
String[] tuple = LessStrings.splitArray(args, ":");
cols = LessStrings.splitArray(tuple[0], ",");
valueIndex = Integer.valueOf(LessStrings.splitArray(tuple[1], ",")[0]);
freqIndex = Integer.valueOf(LessStrings.splitArray(tuple[1], ",")[1]);
String[] pcols = LessStrings.splitArray(tuple[2], ",");
List<Double> percentiles = new ArrayList<>();
for (int i = 0; i < pcols.length; i++) {
if ("total".equals(pcols[i])) {
appendTotal = true;
} else {
percentiles.add(Double.valueOf(pcols[i]));
}
}
desiredPercentiles = percentiles.toArray(desiredPercentiles);
}
@Override
public DataTable tableOp(DataTable result) {
BundleColumnBinder rowBinder;
BundleColumnBinder deckBinder;
BundleField[] keyColumns;
BundleField valueColumn;
BundleField freqColumn;
DataTable table = createTable(0);
FTable recentFTable = new FTable();
Bundle onDeck = null;
if (result.size() > 0) {
onDeck = result.get(0);
} else {
return table;
}
// try
// {
for (int i = 0; i < result.size(); i++) {
boolean lastRow = i == result.size() - 1;
Bundle row = result.get(i);
rowBinder = new BundleColumnBinder(row, cols);
deckBinder = new BundleColumnBinder(onDeck);
keyColumns = rowBinder.getFields();
valueColumn = row.getFormat().getField(Integer.toString(valueIndex));
freqColumn = row.getFormat().getField(Integer.toString(freqIndex));
boolean eqCol = equalColumnKeys(keyColumns, onDeck, row);
//System.out.println("foo " + row + " deck " + onDeck + " freq " + recentFTable);
if (eqCol) {
recentFTable.update(ValueUtil.asNumberOrParseLong(row.getValue(valueColumn), 10).asLong().getLong(),
ValueUtil.asNumberOrParseLong(row.getValue(freqColumn), 10).asLong().getLong());
}
if (!eqCol || lastRow) {
onDeck.removeValue(valueColumn);
onDeck.removeValue(freqColumn);
if (appendTotal) {
deckBinder.appendColumn(onDeck, ValueFactory.create(recentFTable.getTotalEntries()));
}
for (int j = 0; j < desiredPercentiles.length; j++) {
long percentile = recentFTable.getNearestPercentile(desiredPercentiles[j]);
deckBinder.appendColumn(onDeck, ValueFactory.create(percentile));
}
table.append(onDeck);
if (onDeck != row) {
recentFTable = new FTable();
recentFTable.update(ValueUtil.asNumberOrParseLong(row.getValue(valueColumn), 10).asLong().getLong(),
ValueUtil.asNumberOrParseLong(row.getValue(freqColumn), 10).asLong().getLong());
}
onDeck = row;
if (lastRow && !eqCol) {
onDeck.removeValue(valueColumn);
onDeck.removeValue(freqColumn);
if (appendTotal) {
deckBinder.appendColumn(onDeck, ValueFactory.create(recentFTable.getTotalEntries()));
}
for (int j = 0; j < desiredPercentiles.length; j++) {
long percentile = recentFTable.getNearestPercentile(desiredPercentiles[j]);
deckBinder.appendColumn(onDeck, ValueFactory.create(percentile));
}
table.append(onDeck);
}
}
//System.out.println("bar " + row + " deck " + onDeck + " freq " + recentFTable);
}
// }
// catch(Exception e)
// {
// e.printStackTrace();
// }
return table;
}
public static boolean equalColumnKeys(BundleField[] keyColumns, Bundle oldb, Bundle newb) {
boolean eq = true;
for (int i = 0; i < keyColumns.length; i++) {
String oldstr = ValueUtil.asNativeString(oldb.getValue(keyColumns[i]));
String newstr = ValueUtil.asNativeString(newb.getValue(keyColumns[i]));
if (oldstr != null && newstr != null && oldstr.equals(newstr)) {
;
} else {
eq = false;
}
}
return eq;
}
// todo: floats, ints and doubles too?
public static class FTable {
private long totalEntries;
// todo: use some fancy primitive collection
private SortedMap<Long, Long> freqMap;
public FTable() {
totalEntries = 0;
freqMap = new TreeMap<>();
}
public void update(Long value, Long freq) {
if (freqMap.containsKey(value)) {
freqMap.put(value, freqMap.get(value) + freq);
} else {
freqMap.put(value, freq);
}
totalEntries += freq;
}
public long getTotalEntries() {
return totalEntries;
}
// todo: mean of odd values?
public long getNearestPercentile(double p) {
long pindex = Math.max(0, Math.round(p * totalEntries));
long index = 0;
for (Map.Entry<Long, Long> entry : freqMap.entrySet()) {
index += entry.getValue();
if (index >= pindex) {
return entry.getKey();
}
}
return -1; // something went wrong
}
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("totalEntries", totalEntries)
.add("freqMap", freqMap)
.toString();
}
}
}