/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.function.aggregate; import java.nio.ByteBuffer; import org.diqube.data.column.ColumnType; import org.diqube.function.AggregationFunction; import org.diqube.function.Function; import org.diqube.function.FunctionException; import org.diqube.function.aggregate.result.IntermediaryResultValueIterator; import org.diqube.function.aggregate.result.IntermediaryResultValueSink; import org.diqube.function.aggregate.util.SerializedAVLTreeDigest; import com.tdunning.math.stats.AVLTreeDigest; /** * Approximates a custom quantile using t-digest. * * @author Bastian Gloeckle */ @Function(name = QuantileDoubleFunction.NAME) public class QuantileDoubleFunction implements AggregationFunction<Double, Double> { public static final String NAME = "quantile"; private Double quantile = null; private AVLTreeDigest tdigest = new AVLTreeDigest(100.); private boolean complete = false; @Override public String getNameLowerCase() { return NAME; } @Override public void provideConstantParameter(int idx, Object value) throws FunctionException { if (!(value instanceof Double)) throw new FunctionException("Parameter to " + NAME + " function can be a DOUBLE only."); if (idx == 0) quantile = (Double) value; } @Override public void addValues(ValueProvider<Double> valueProvider) { for (Double value : valueProvider.getValues()) tdigest.add(value); complete = valueProvider.isFinalSetOfValues(); } @Override public void addIntermediary(IntermediaryResultValueIterator intermediary) { Long otherComplete = (Long) intermediary.next(); if (otherComplete == 1L) { // only add state if we received some state where the other instance was "complete", i.e. this is the final result // of a single other instance of this function. We will not have to remove this state again (we wouldn't be able // to anyway). SerializedAVLTreeDigest serialized = (SerializedAVLTreeDigest) intermediary.next(); AVLTreeDigest other = AVLTreeDigest.fromBytes(ByteBuffer.wrap(serialized.getSerialized())); tdigest.add(other); } } @Override public void removeIntermediary(IntermediaryResultValueIterator intermediary) { // noop, tdigest does not support removing state. } @Override public void populateIntermediary(IntermediaryResultValueSink res) throws FunctionException { ensureQuantilePresent(); if (!complete) { res.pushValue(Long.valueOf(0L)); } else { res.pushValue(Long.valueOf(1L)); tdigest.compress(); ByteBuffer buf = ByteBuffer.allocate(tdigest.byteSize()); tdigest.asSmallBytes(buf); int len = buf.position(); byte[] b = new byte[len]; buf.rewind(); buf.get(b); res.pushValue(new SerializedAVLTreeDigest(b)); } } @Override public Double calculate() throws FunctionException { ensureQuantilePresent(); double res = tdigest.quantile(quantile); if (Double.isNaN(res)) return -1.; return res; } private void ensureQuantilePresent() throws FunctionException { if (quantile == null) throw new FunctionException("Which quantile should be calculated? Use constant paramater to specify. " + "Example: quantile(0.25, X) to calculate the 25% quantile."); } @Override public ColumnType getOutputType() { return ColumnType.DOUBLE; } @Override public ColumnType getInputType() { return ColumnType.LONG; } @Override public boolean needsActualValues() { return true; } }