OpPercentileDistribution.java example

Explorer
hydra-master
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.addthis.hydra.data.query.op;

import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.core.BundleFactory;
import com.addthis.bundle.core.BundleFormat;
import com.addthis.bundle.util.AutoField;
import com.addthis.bundle.util.IndexField;
import com.addthis.bundle.value.ValueFactory;
import com.addthis.hydra.data.query.AbstractQueryOp;
import com.addthis.hydra.data.util.KeyPercentileDistribution;

import com.yammer.metrics.stats.Snapshot;

import io.netty.channel.ChannelProgressivePromise;

/**
 * <p>This query operation <span class="hydra-summary">calculates the percentile distribution of a column</span>.
 * <p/>
 * <p>The syntax for the operation is distribution=[column number],[sample size]. The sample
 * size is optional and the default sample size is 1028. The result of this operation is a table
 * with two columns. Column 0 has percentile distributions and column 1 has the counts for
 * those percentile distributions.</p>
 *
 * @user-reference
 * @hydra-name distribution
 */
public class OpPercentileDistribution extends AbstractQueryOp {

    private final KeyPercentileDistribution histo;
    private final AutoField column;
    private final BundleFactory bundleFactory;

    /**
     * usage: column, sampleSize
     * <p/>
     * column defines the column source for the percentile value
     * sampleSize determines the size of the sample set to use when calculating percentiles
     */
    public OpPercentileDistribution(BundleFactory bundleFactory, String args, ChannelProgressivePromise queryPromise) {
        super(queryPromise);
        this.bundleFactory = bundleFactory;
        int[] v = csvToInts(args);
        if (v.length < 1) {
            throw new RuntimeException("missing required column");
        }
        column = new IndexField(v[0]);
        int sampleSize;
        if (v.length > 1) {
            sampleSize = v[1];
        } else {
            sampleSize = 1028;
        }
        histo = new KeyPercentileDistribution(sampleSize).init();
    }

    @Override public void send(Bundle bundle) {
        long ev = column.getLong(bundle).getAsLong();
        histo.update(ev);
    }

    @Override public void sendComplete() {
        // prep bundle format
        Bundle bundle = bundleFactory.createBundle();
        BundleFormat tableFormat = bundle.getFormat();
        ensureMinimumFieldCount(tableFormat, 2);
        AutoField label = new IndexField(0);
        AutoField value = new IndexField(1);
        // output
        Snapshot snapshot = histo.getSnapshot();
        writeLine(label, value, bundle, ".5", snapshot.getMedian());
        writeLine(label, value, bundleFactory.createBundle(), ".75", snapshot.get75thPercentile());
        writeLine(label, value, bundleFactory.createBundle(), ".95", snapshot.get95thPercentile());
        writeLine(label, value, bundleFactory.createBundle(), ".98", snapshot.get98thPercentile());
        writeLine(label, value, bundleFactory.createBundle(), ".99", snapshot.get99thPercentile());
        writeLine(label, value, bundleFactory.createBundle(), ".999", snapshot.get999thPercentile());
        getNext().sendComplete();
    }

    private void writeLine(AutoField labelField,
                           AutoField valueField,
                           Bundle bundle,
                           String label,
                           double value) {
        labelField.setValue(bundle, ValueFactory.create(label));
        valueField.setValue(bundle, ValueFactory.create(value));
        getNext().send(bundle);
    }

    private static void ensureMinimumFieldCount(BundleFormat format, int targetCount) {
        int suffixNum = 0;
        while (format.getFieldCount() < targetCount) {
            format.getField("__op_percent_dist_anon_" + suffixNum);
            suffixNum += 1;
        }
    }
}