/* * Copyright (C) 2011 Clearspring Technologies, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.streaminer.stream.cardinality; import org.streaminer.util.hash.MurmurHash; import org.streaminer.util.IBuilder; import java.util.Arrays; public class LogLog implements IRichCardinality { /** * Gamma function computed using SciLab * ((gamma(-(m.^(-1))).* ( (1-2.^(m.^(-1)))./log(2) )).^(-m)).*m */ protected static final double[] mAlpha = { 0, 0.44567926005415, 1.2480639342271, 2.8391255240079, 6.0165231584811, 12.369319965552, 25.073991603109, 50.482891762521, 101.30047482549, 202.93553337953, 406.20559693552, 812.74569741657, 1625.8258887309, 3251.9862249084, 6504.3071471860, 13008.949929672, 26018.222470181, 52036.684135280, 104073.41696276, 208139.24771523, 416265.57100022, 832478.53851627, 1669443.2499579, 3356902.8702907, 6863377.8429508, 11978069.823687, 31333767.455026, 52114301.457757, 72080129.928986, 68945006.880409, 31538957.552704, 3299942.4347441 }; protected final int k; protected int m; protected double Ca; protected byte[] M; protected int Rsum = 0; public LogLog(int k) { if (k >= (mAlpha.length - 1)) { throw new IllegalArgumentException(String.format("Max k (%d) exceeded: k=%d", mAlpha.length - 1, k)); } this.k = k; this.m = 1 << k; this.Ca = mAlpha[k]; this.M = new byte[m]; } public LogLog(byte[] M) { this.M = M; this.m = M.length; this.k = Integer.numberOfTrailingZeros(m); assert (m == (1 << k)) : "Invalid array size: M.length must be a power of 2"; this.Ca = mAlpha[k]; for (byte b : M) { Rsum += b; } } @Override public byte[] getBytes() { return M; } public int sizeof() { return m; } @Override public long cardinality() { /* for(int j=0; j<m; j++) System.out.print(M[j]+"|"); System.out.println(); */ double Ravg = Rsum / (double) m; return (long) (Ca * Math.pow(2, Ravg)); } @Override public boolean offerHashed(long hashedLong) { throw new UnsupportedOperationException(); } @Override public boolean offerHashed(int hashedInt) { boolean modified = false; int j = hashedInt >>> (Integer.SIZE - k); byte r = (byte) (Integer.numberOfLeadingZeros((hashedInt << k) | (1 << (k - 1))) + 1); if (M[j] < r) { Rsum += r - M[j]; M[j] = r; modified = true; } return modified; } @Override public boolean offer(Object o) { int x = MurmurHash.getInstance().hash(o); return offerHashed(x); } /** * Computes the position of the first set bit of the last Integer.SIZE-k bits * * @return Integer.SIZE-k if the last k bits are all zero */ protected static int rho(int x, int k) { return Integer.numberOfLeadingZeros((x << k) | (1 << (k - 1))) + 1; } /** * @return this if estimators is null or no arguments are passed * @throws LogLogMergeException if estimators are not mergeable (all estimators must be instances of LogLog of the same size) */ @Override public IRichCardinality merge(IRichCardinality... estimators) throws LogLogMergeException { if (estimators == null) { return new LogLog(M); } byte[] mergedBytes = Arrays.copyOf(this.M, this.M.length); for (IRichCardinality estimator : estimators) { if (!(this.getClass().isInstance(estimator))) { throw new LogLogMergeException("Cannot merge estimators of different class"); } if (estimator.sizeof() != this.sizeof()) { throw new LogLogMergeException("Cannot merge estimators of different sizes"); } LogLog ll = (LogLog) estimator; for (int i = 0; i < mergedBytes.length; ++i) { mergedBytes[i] = (byte) Math.max(mergedBytes[i], ll.M[i]); } } return new LogLog(mergedBytes); } /** * Merges estimators to produce an estimator for their combined streams * * @param estimators * @return merged estimator or null if no estimators were provided * @throws LogLogMergeException if estimators are not mergeable (all estimators must be the same size) */ public static LogLog mergeEstimators(LogLog... estimators) throws LogLogMergeException { if (estimators == null || estimators.length == 0) { return null; } return (LogLog) estimators[0].merge(Arrays.copyOfRange(estimators, 1, estimators.length)); } @SuppressWarnings("serial") protected static class LogLogMergeException extends CardinalityMergeException { public LogLogMergeException(String message) { super(message); } } public static class Builder implements IBuilder<IRichCardinality> { protected final int k; public Builder() { this(16); } public Builder(int k) { this.k = k; } @Override public LogLog build() { return new LogLog(k); } @Override public int sizeof() { return 1 << k; } } }