/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.tree.prop; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.nio.ByteBuffer; import com.addthis.basis.util.LessStrings; import com.addthis.bundle.core.Bundle; import com.addthis.bundle.core.BundleField; import com.addthis.bundle.util.ValueUtil; import com.addthis.bundle.value.AbstractCustom; import com.addthis.bundle.value.Numeric; import com.addthis.bundle.value.ValueArray; import com.addthis.bundle.value.ValueBytes; import com.addthis.bundle.value.ValueDouble; import com.addthis.bundle.value.ValueFactory; import com.addthis.bundle.value.ValueLong; import com.addthis.bundle.value.ValueMap; import com.addthis.bundle.value.ValueObject; import com.addthis.bundle.value.ValueSimple; import com.addthis.bundle.value.ValueString; import com.addthis.bundle.value.ValueTranslationException; import com.addthis.codec.annotations.FieldConfig; import com.addthis.codec.codables.SuperCodable; import com.addthis.hydra.data.tree.DataTreeNode; import com.addthis.hydra.data.tree.DataTreeNodeUpdater; import com.addthis.hydra.data.tree.TreeDataParameters; import com.addthis.hydra.data.tree.TreeNodeData; import com.clearspring.analytics.stream.quantile.TDigest; public class DataTDigest extends TreeNodeData<DataTDigest.Config> implements SuperCodable { /** * <p>This data attachment is a <span class="hydra-summary">TDigest attached to a node</span></p> * * <p> * Adaptive histogram based on something like streaming k-means crossed with Q-digest. * <p/> * The special characteristics of this algorithm are: * <p/> * a) smaller summaries than Q-digest * <p/> * b) works on doubles as well as integers. * <p/> * c) provides part per million accuracy for extreme quantiles and typically <1000 ppm accuracy for middle quantiles * </p> * </p> * * <p> The {@link #key key} field is required and specifies the bundle field name from which * keys will be inserted into the digest. The @{link #compression compression} field is an * optional parameter and specifies the trade off for acuracy vs size. The default value * is 100. The larger the value the more accurate the digest will be but larger compression * also results in larger object size. * </p> * * <p><b>Job Configuration Example</b> * <pre> * {counts {value:"service", data.timeDigest.tdigest {key:"TIME", compression:100}}} * </pre> * </p> * * <p><b>Query Path Directives</b></p></p> * * <pre>"$" operations support the following commands in the format $+{attachment}={command}: * cdf(x) : the number of values that are less than or equal to the given a cdf evaluated at x * quantile(x) : the value of the digest for quantile x (x must be between 0 and 1) * </pre> * * @user-reference */ public static final class Config extends TreeDataParameters<DataTDigest> { /** * Bundle field name from which to insert keys into the sketch. * This field is required. */ @FieldConfig(codable = true, required = true) private String key; /** * Optionally specify the compression of the digest. * <p/> * How should accuracy be traded for size? A value of N here will give quantile errors * almost always less than 3/N with considerably smaller errors expected for extreme * quantiles. Conversely, you should expect to track about 5 N centroids for this * accuracy. */ @FieldConfig(codable = true) private int compression = 100; @Override public DataTDigest newInstance() { DataTDigest db = new DataTDigest(); db.filter = new TDigest(compression); return db; } } @FieldConfig(codable = true) private byte[] raw; private TDigest filter; private BundleField valueAccess; @Override public ValueObject getValue(String key) { double quantile = .95; TDigestValue.OP op = TDigestValue.OP.QUANTILE; if (key != null && key.startsWith("quantile(") && key.endsWith(")")) { op = TDigestValue.OP.QUANTILE; quantile = Double.valueOf(key.substring(9, key.length() - 1)); } else if (key != null && key.startsWith("cdf(") && key.endsWith(")")) { op = TDigestValue.OP.CDF; quantile = Double.valueOf(key.substring(4, key.length() - 1)); } return new TDigestValue(filter, op, quantile); } @Override public List<DataTreeNode> getNodes(DataTreeNode parent, String key) { String[] keys = LessStrings.splitArray(key, ","); List<DataTreeNode> list = new ArrayList<>(keys.length); for (String k : keys) { double quantile = filter.quantile(Double.valueOf(k)); list.add(new VirtualTreeNode(k, (long) quantile)); } return list; } /** * updates the TDigest with value from the key field. Expectation is that the value * is a number. */ @Override public boolean updateChildData(DataTreeNodeUpdater state, DataTreeNode childNode, Config conf) { Bundle p = state.getBundle(); if (valueAccess == null) { valueAccess = p.getFormat().getField(conf.key); } Numeric o = ValueUtil.asNumberOrParseDouble(p.getValue(valueAccess)); if (o != null) { filter.add(o.asDouble().getDouble()); return true; } return false; } @Override public void postDecode() { filter = TDigest.fromBytes(ByteBuffer.wrap(raw)); } @Override public void preEncode() { int bound = filter.byteSize(); if (bound > 0) { ByteBuffer buf = ByteBuffer.allocate(bound); filter.asSmallBytes(buf); raw = buf.array(); } } public static final class TDigestValue extends AbstractCustom<TDigest> implements Numeric { enum OP {CDF, QUANTILE} private Double quantile; private OP op; /* required for codec */ public TDigestValue() { super(null); } public TDigestValue(TDigest tdigest, OP op, Double quantile) { super(tdigest); this.quantile = quantile; this.op = op; } @Override public TYPE getObjectType() { return TYPE.CUSTOM; } @Override public ValueBytes asBytes() throws ValueTranslationException { throw new ValueTranslationException(); } @Override public ValueArray asArray() throws ValueTranslationException { throw new ValueTranslationException(); } @Override public ValueMap asMap() throws ValueTranslationException { ValueMap map = ValueFactory.createMap(); int bound = heldObject.byteSize(); ByteBuffer buf = ByteBuffer.allocate(bound); heldObject.asSmallBytes(buf); map.put("q", ValueFactory.create(quantile)); map.put("o", ValueFactory.create(op.toString())); map.put("b", ValueFactory.create(buf.array())); return map; } @Override public void setValues(ValueMap valueMapEntries) { byte[] b = valueMapEntries.get("b").asBytes().asNative(); this.quantile = valueMapEntries.get("q").asDouble().getDouble(); this.op = OP.valueOf(valueMapEntries.get("o").asString().toString()); heldObject = TDigest.fromBytes(ByteBuffer.wrap(b)); } @Override public Numeric asNumeric() throws ValueTranslationException { return this; } @Override public ValueLong asLong() { return asDouble().asLong(); } @Override public ValueDouble asDouble() { switch (op) { case CDF: return ValueFactory.create(heldObject.cdf(quantile)); case QUANTILE: return ValueFactory.create(heldObject.quantile(quantile)); default: return ValueFactory.create(heldObject.quantile(quantile)); } } @Override public ValueString asString() throws ValueTranslationException { return asDouble().asString(); } @Override public ValueSimple asSimple() { return asDouble(); } @Override public Numeric sum(Numeric val) { if (TDigestValue.class == val.getClass()) { return new TDigestValue(TDigest.merge(heldObject.compression(), Arrays.asList(this.heldObject, ((TDigestValue) val).heldObject)), op, quantile); } return asLong().sum(val.asLong()); } private long toLong() { return asLong().getLong(); } @Override public ValueDouble diff(Numeric val) { return sum(val).asDouble().diff(asDouble()); } @Override public ValueDouble prod(Numeric val) { return sum(val).asDouble().prod(asDouble()); } @Override public ValueDouble divide(Numeric val) { return sum(val).asDouble().divide(asDouble()); } @Override public ValueDouble avg(int count) { return ValueFactory.create(asDouble().getDouble() / (double) count); } @Override public Numeric min(Numeric val) { if (val.asDouble().getDouble() < asDouble().getDouble()) { return val; } else { return this; } } @Override public Numeric max(Numeric val) { if (val.asDouble().getDouble() > asDouble().getDouble()) { return val; } else { return this; } } @Override public String toString() { return asString().toString(); } } }