/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.startree.hll;
import com.clearspring.analytics.stream.cardinality.CardinalityMergeException;
import com.clearspring.analytics.stream.cardinality.HyperLogLog;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableBiMap;
import com.linkedin.pinot.common.Utils;
import com.linkedin.pinot.core.data.GenericRow;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.List;
/**
* Utility functions for manipulation of hll field.
*/
public class HllUtil {
private static final ImmutableBiMap<Integer, Integer> LOG2M_TO_SIZE_IN_BYTES =
ImmutableBiMap.of(5, 32, 6, 52, 7, 96, 8, 180, 9, 352);
private static final Charset charset = Charset.forName("UTF-8");
/**
* To display a row with hll fields properly,
* instead of directly invoking {@link GenericRow#toString()},
* hll fields should be inspected and transformed.
*
* @param row GenericRow
* @param hllDeriveColumnSuffix column with this suffix will be treated as hll column
* @return string representation of row
*/
public static String inspectGenericRow(GenericRow row, String hllDeriveColumnSuffix) {
StringBuilder b = new StringBuilder();
for (String name : row.getFieldNames()) {
b.append(name);
b.append(" : ");
Object value = row.getValue(name);
if (value instanceof String && name.endsWith(hllDeriveColumnSuffix)) {
// hll field
b.append(convertStringToHll((String) value).cardinality());
} else if (value instanceof Object[]) {
b.append(Arrays.toString((Object[]) value));
} else {
b.append(value);
}
b.append(", ");
}
return b.toString();
}
public static int getHllFieldSizeFromLog2m(int log2m) {
Preconditions.checkArgument(LOG2M_TO_SIZE_IN_BYTES.containsKey(log2m),
"Log2m: " + log2m + " is not in valid range.");
return LOG2M_TO_SIZE_IN_BYTES.get(log2m);
}
public static int getLog2mFromHllFieldSize(int hllFieldSize) {
Preconditions.checkArgument(LOG2M_TO_SIZE_IN_BYTES.containsValue(hllFieldSize),
"HllFieldSize: " + hllFieldSize + " is not in valid range.");
return LOG2M_TO_SIZE_IN_BYTES.inverse().get(hllFieldSize);
}
public static String convertHllToString(HyperLogLog hll) {
try {
return new String(SerializationConverter.byteArrayToChars(hll.getBytes()));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static HyperLogLog convertStringToHll(String s) {
return buildHllFromBytes(SerializationConverter.charsToByteArray(s.toCharArray()));
}
/**
* Generate a hll from a single value, and convert it to string type.
* It is used for default derived field value.
* @param log2m
* @param value
* @return
*/
public static String singleValueHllAsString(int log2m, Object value) {
HyperLogLog hll = new HyperLogLog(log2m);
hll.offer(value);
return convertHllToString(hll);
}
public static HyperLogLog buildHllFromBytes(byte[] bytes) {
try {
return HyperLogLog.Builder.build(bytes);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static HyperLogLog clone(HyperLogLog hll, int log2m) {
try {
HyperLogLog ret = new HyperLogLog(log2m);
ret.addAll(hll);
return ret;
} catch (CardinalityMergeException e) {
throw new RuntimeException(e);
}
}
/**
* Merge all HLLs in list to the first HLL in the list, the list must contain at least one element
* @param resultList
* @return
*/
public static HyperLogLog mergeHLLResultsToFirstInList(List<HyperLogLog> resultList) {
HyperLogLog hllResult = resultList.get(0);
for (int i = 1; i < resultList.size(); ++i) {
try {
hllResult.addAll(resultList.get(i));
} catch (CardinalityMergeException e) {
Utils.rethrowException(e);
}
}
return hllResult;
}
/**
* Convert between byte array and char array, one byte is mapped to one char and vice versa.
* This is due to UTF-8 encoding for String type serialization used all over the system.
*/
public static class SerializationConverter {
private static final int BYTE_TO_CHAR_OFFSET = 129; // we choose 129 since normally we leave \0 for padding.
public static char[] byteArrayToChars(byte[] byteArray) {
char[] charArrayBuffer = new char[byteArray.length];
for (int i = 0; i < byteArray.length; i++) {
charArrayBuffer[i] = byteToChar(byteArray[i]);
}
return charArrayBuffer;
}
public static byte[] charsToByteArray(char[] chars) {
byte[] ret = new byte[chars.length];
for (int i = 0; i < ret.length; i++) {
ret[i] = charToByte(chars[i]);
}
return ret;
}
public static char byteToChar(byte b) {
return (char)(((int)b) + BYTE_TO_CHAR_OFFSET);
}
public static byte charToByte(char c) {
return (byte)(((int)c) - BYTE_TO_CHAR_OFFSET);
}
}
}