/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.tree.prop; import com.addthis.basis.util.LessBytes; import com.addthis.basis.util.LessStrings; import com.addthis.bundle.core.Bundle; import com.addthis.bundle.core.BundleField; import com.addthis.bundle.util.ValueUtil; import com.addthis.bundle.value.AbstractCustom; import com.addthis.bundle.value.Numeric; import com.addthis.bundle.value.ValueArray; import com.addthis.bundle.value.ValueBytes; import com.addthis.bundle.value.ValueCustom; import com.addthis.bundle.value.ValueDouble; import com.addthis.bundle.value.ValueFactory; import com.addthis.bundle.value.ValueLong; import com.addthis.bundle.value.ValueMap; import com.addthis.bundle.value.ValueObject; import com.addthis.bundle.value.ValueSimple; import com.addthis.bundle.value.ValueString; import com.addthis.bundle.value.ValueTranslationException; import com.addthis.codec.annotations.FieldConfig; import com.addthis.codec.binary.CodecBin2; import com.addthis.codec.codables.SuperCodable; import com.addthis.codec.json.CodecJSON; import com.addthis.hydra.data.tree.DataTreeNode; import com.addthis.hydra.data.tree.DataTreeNodeUpdater; import com.addthis.hydra.data.tree.TreeDataParameters; import com.addthis.hydra.data.tree.TreeNodeData; import com.addthis.hydra.store.util.Raw; import com.addthis.hydra.store.util.SeenFilterBasic; /** * like DataBloom but better integrated to into query. over time we need to * resolve this. */ public class DataSeen extends TreeNodeData<DataSeen.Config> implements SuperCodable { /** * <p>This data attachment is a <span class="hydra-summary">bloom filter attached to a node</span>. * <p/> * <p><b>Query Path Directives</b> * <p/> * <p>${attachment}={command} where command is one of the following: * <ul> * <li>sat : saturation of the bloom filter</li> * <li>bits : total number of bits allocated to the bloom filter</li> * <li>ck-[string] : returns 1 or 0 based on testing against bloom filter</li> * <li>st-[string] : returns 1 or 0 based on testing. Then insert string into filter.</li> * </ul> * <p/> * <p>Calling ${attachment} without any commands will return the bloom filter * as a value object. This will allow you to merge two bloom filters by * summing two bloom filter objects together.</p> * <p/> * <p>The % notation is not supported for this data attachment.</p> * * @user-reference */ public static final class Config extends TreeDataParameters<DataSeen> { /** * Bundle field name from which to draw values. * This field is required. */ @FieldConfig(codable = true) private String key; /** * Maximum number of elements that can be stored in the bloom filter. * This field is required. */ @FieldConfig(codable = true) private int max; /** * Number of hash function evaluations for each insertion * operation. This parameter is usually referred to as * the "k" parameter in the literature. Default value is 4. */ @FieldConfig(codable = true) private int bitsPer = 4; /** * Type of hash function that is used. The following types are available. * <p>0 - HASH_HASHCODE : mostly bad * <p>1 - HASH_HASHCODE_SHIFT_REV : mostly bad * <p>2 - HASH_HASHCODE_LONG_REV : mostly bad * <p>3 - HASH_MD5 : marginally better accuracy, much slower * <p>4 - HASH_PLUGGABLE_SHIFT : best blend of speed and accuracy * <p>Default value is 4. */ @FieldConfig(codable = true) private int hash = 4; @Override public DataSeen newInstance() { DataSeen db = new DataSeen(); db.bloom = new SeenFilterBasic<>(max * bitsPer, bitsPer, hash); return db; } } @FieldConfig(codable = true) private SeenFilterBasic<Raw> bloom; private BundleField keyAccess; @Override public ValueObject getValue(String key) { if (LessStrings.isEmpty(key)) { return new ValueBloom(bloom); } else if (key.equals("sat")) { return ValueFactory.create(bloom.getSaturation()); } else if (key.equals("bits")) { return ValueFactory.create(bloom.getBits()); } else if (key.startsWith("ck-")) { return ValueFactory.create(bloom.getSeen(Raw.get(LessBytes.urldecode(key.substring(3)))) ? 1 : 0); } else if (key.startsWith("st-")) { long[] set = bloom.getHashSet(Raw.get(LessBytes.urldecode(key.substring(3)))); boolean seen = bloom.checkHashSet(set); bloom.updateHashSet(set); return ValueFactory.create(seen ? 1 : 0); } else { return null; } } @Override public boolean updateChildData(DataTreeNodeUpdater state, DataTreeNode childNode, Config conf) { Bundle p = state.getBundle(); if (keyAccess == null) { keyAccess = p.getFormat().getField(conf.key); } ValueObject o = p.getValue(keyAccess); if (o != null) { bloom.setSeen(Raw.get(ValueUtil.asNativeString(o))); } return true; } @Override public void postDecode() { } @Override public void preEncode() { } /** * for working with bloom filters */ public static final class ValueBloom extends AbstractCustom<SeenFilterBasic<?>> implements Numeric { public ValueBloom() { super(null); } public ValueBloom(SeenFilterBasic<?> bloom) { super(bloom); } @Override public String toString() { try { return CodecJSON.encodeString(heldObject); } catch (Exception e) { return super.toString(); } } public long toLong() { return heldObject.getSaturation(); } @Override public Numeric avg(int count) { return this; } @Override public Numeric diff(Numeric val) { return this; } @Override public Numeric prod(Numeric val) { return this; } @Override public Numeric divide(Numeric val) { return this; } @Override public Numeric max(Numeric val) { if (val.getClass() == getClass()) { ValueBloom b = (ValueBloom) val; return b.toLong() > toLong() ? b : this; } return this; } @Override public Numeric min(Numeric val) { if (val.getClass() == getClass()) { ValueBloom b = (ValueBloom) val; return b.toLong() < toLong() ? b : this; } return this; } @Override public Numeric sum(Numeric val) { if (val.getClass() == getClass()) { ValueBloom b = (ValueBloom) val; return new ValueBloom(b.heldObject.mergeSeen(heldObject)); } return this; } @Override public TYPE getObjectType() { return TYPE.CUSTOM; } @Override public ValueBytes asBytes() throws ValueTranslationException { throw new ValueTranslationException(); } @Override public ValueArray asArray() throws ValueTranslationException { throw new ValueTranslationException(); } @Override public Numeric asNumeric() throws ValueTranslationException { return this; } @Override public ValueLong asLong() throws ValueTranslationException { return ValueFactory.create(toLong()); } @Override public ValueDouble asDouble() throws ValueTranslationException { return ValueFactory.create((double) toLong()); } @Override public ValueString asString() throws ValueTranslationException { throw new ValueTranslationException(); } @Override public ValueCustom asCustom() throws ValueTranslationException { return this; } @Override public ValueMap asMap() throws ValueTranslationException { try { ValueMap map = ValueFactory.createMap(); map.put("b", ValueFactory.create(CodecBin2.encodeBytes(heldObject))); return map; } catch (Exception ex) { throw new ValueTranslationException(ex); } } @Override public void setValues(ValueMap map) { try { heldObject = (SeenFilterBasic<?>) CodecBin2.decodeBytes( new SeenFilterBasic(), map.get("b").asBytes().asNative()); } catch (Exception ex) { throw new ValueTranslationException(ex); } } @Override public ValueSimple asSimple() { return ValueFactory.create(heldObject.getSaturation()); } } }