/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.util.hll; /** * A collection of constants and utilities for serializing and deserializing * HLLs. * * NOTE: 'package' visibility is used for many methods that only need to be * used by the {@link ISchemaVersion} implementations. The structure of * a serialized HLL's metadata should be opaque to the rest of the * library. */ class SerializationUtil { /** * The number of bits (of the parameters byte) dedicated to encoding the * width of the registers. */ /*package*/ static int REGISTER_WIDTH_BITS = 3; /** * A mask to cap the maximum value of the register width. */ /*package*/ static int REGISTER_WIDTH_MASK = (1 << REGISTER_WIDTH_BITS) - 1; /** * The number of bits (of the parameters byte) dedicated to encoding * <code>log2(registerCount)</code>. */ /*package*/ static int LOG2_REGISTER_COUNT_BITS = 5; /** * A mask to cap the maximum value of <code>log2(registerCount)</code>. */ /*package*/ static int LOG2_REGISTER_COUNT_MASK = (1 << LOG2_REGISTER_COUNT_BITS) - 1; /** * The number of bits (of the cutoff byte) dedicated to encoding the * log-base-2 of the explicit cutoff or sentinel values for * 'explicit-disabled' or 'auto'. */ /*package*/ static int EXPLICIT_CUTOFF_BITS = 6; /** * A mask to cap the maximum value of the explicit cutoff choice. */ /*package*/ static int EXPLICIT_CUTOFF_MASK = (1 << EXPLICIT_CUTOFF_BITS) - 1; /** * Number of bits in a nibble. */ private static int NIBBLE_BITS = 4; /** * A mask to cap the maximum value of a nibble. */ private static int NIBBLE_MASK = (1 << NIBBLE_BITS) - 1; // ************************************************************************ // Serialization utilities /** * Schema version one (v1). */ public static ISchemaVersion VERSION_ONE = new SchemaVersionOne(); /** * The default schema version for serializing HLLs. */ public static ISchemaVersion DEFAULT_SCHEMA_VERSION = VERSION_ONE; /** * List of registered schema versions, indexed by their version numbers. If * an entry is <code>null</code>, then no such schema version is registered. * Similarly, registering a new schema version simply entails assigning an * {@link ISchemaVersion} instance to the appropriate index of this array.<p/> * * By default, only {@link SchemaVersionOne} is registered. Note that version * zero will always be reserved for internal (e.g. proprietary, legacy) schema * specifications/implementations and will never be assigned to in by this * library. */ public static ISchemaVersion[] REGISTERED_SCHEMA_VERSIONS = new ISchemaVersion[16]; static { REGISTERED_SCHEMA_VERSIONS[1] = VERSION_ONE; } /** * @param schemaVersionNumber the version number of the {@link ISchemaVersion} * desired. This must be a registered schema version number. * @return The {@link ISchemaVersion} for the given number. This will never * be <code>null</code>. */ public static ISchemaVersion getSchemaVersion(final int schemaVersionNumber) { if(schemaVersionNumber >= REGISTERED_SCHEMA_VERSIONS.length || schemaVersionNumber < 0) { throw new RuntimeException("Invalid schema version number " + schemaVersionNumber); } final ISchemaVersion schemaVersion = REGISTERED_SCHEMA_VERSIONS[schemaVersionNumber]; if(schemaVersion == null) { throw new RuntimeException("Unknown schema version number " + schemaVersionNumber); } return schemaVersion; } /** * Get the appropriate {@link ISchemaVersion schema version} for the specified * serialized HLL. * * @param bytes the serialized HLL whose schema version is desired. * @return the schema version for the specified HLL. This will never * be <code>null</code>. */ public static ISchemaVersion getSchemaVersion(final byte[] bytes) { final byte versionByte = bytes[0]; final int schemaVersionNumber = schemaVersion(versionByte); return getSchemaVersion(schemaVersionNumber); } // ************************************************************************ // Package-specific shared helpers /** * Generates a byte that encodes the schema version and the type ordinal * of the HLL. * * The top nibble is the schema version and the bottom nibble is the type * ordinal. * * @param schemaVersion the schema version to encode. * @param typeOrdinal the type ordinal of the HLL to encode. * @return the packed version byte */ public static byte packVersionByte(final int schemaVersion, final int typeOrdinal) { return (byte)(((NIBBLE_MASK & schemaVersion) << NIBBLE_BITS) | (NIBBLE_MASK & typeOrdinal)); } /** * Generates a byte that encodes the log-base-2 of the explicit cutoff * or sentinel values for 'explicit-disabled' or 'auto', as well as the * boolean indicating whether to use {@link HLLType#SPARSE} * in the promotion hierarchy. * * The top bit is always padding, the second highest bit indicates the * 'sparse-enabled' boolean, and the lowest six bits encode the explicit * cutoff value. * * @param explicitCutoff the explicit cutoff value to encode. * <ul> * <li> * If 'explicit-disabled' is chosen, this value should be <code>0</code>. * </li> * <li> * If 'auto' is chosen, this value should be <code>63</code>. * </li> * <li> * If a cutoff of 2<sup>n</sup> is desired, for <code>0 <= n < 31</code>, * this value should be <code>n + 1</code>. * </li> * </ul> * @param sparseEnabled whether {@link HLLType#SPARSE} * should be used in the promotion hierarchy to improve HLL * storage. * * @return the packed cutoff byte */ public static byte packCutoffByte(final int explicitCutoff, final boolean sparseEnabled) { final int sparseBit = (sparseEnabled ? (1 << EXPLICIT_CUTOFF_BITS) : 0); return (byte)(sparseBit | (EXPLICIT_CUTOFF_MASK & explicitCutoff)); } /** * Generates a byte that encodes the parameters of a * {@link HLLType#FULL} or {@link HLLType#SPARSE} * HLL.<p/> * * The top 3 bits are used to encode <code>registerWidth - 1</code> * (range of <code>registerWidth</code> is thus 1-9) and the bottom 5 * bits are used to encode <code>registerCountLog2</code> * (range of <code>registerCountLog2</code> is thus 0-31). * * @param registerWidth the register width (must be at least 1 and at * most 9) * @param registerCountLog2 the log-base-2 of the register count (must * be at least 0 and at most 31) * @return the packed parameters byte */ public static byte packParametersByte(final int registerWidth, final int registerCountLog2) { final int widthBits = ((registerWidth - 1) & REGISTER_WIDTH_MASK); final int countBits = (registerCountLog2 & LOG2_REGISTER_COUNT_MASK); return (byte)((widthBits << LOG2_REGISTER_COUNT_BITS) | countBits); } /** * Extracts the 'sparse-enabled' boolean from the cutoff byte of a serialized * HLL. * * @param cutoffByte the cutoff byte of the serialized HLL * @return the 'sparse-enabled' boolean */ public static boolean sparseEnabled(final byte cutoffByte) { return ((cutoffByte >>> EXPLICIT_CUTOFF_BITS) & 1) == 1; } /** * Extracts the explicit cutoff value from the cutoff byte of a serialized * HLL. * * @param cutoffByte the cutoff byte of the serialized HLL * @return the explicit cutoff value */ public static int explicitCutoff(final byte cutoffByte) { return (cutoffByte & EXPLICIT_CUTOFF_MASK); } /** * Extracts the schema version from the version byte of a serialized * HLL. * * @param versionByte the version byte of the serialized HLL * @return the schema version of the serialized HLL */ public static int schemaVersion(final byte versionByte) { return NIBBLE_MASK & (versionByte >>> NIBBLE_BITS); } /** * Extracts the type ordinal from the version byte of a serialized HLL. * * @param versionByte the version byte of the serialized HLL * @return the type ordinal of the serialized HLL */ public static int typeOrdinal(final byte versionByte) { return (versionByte & NIBBLE_MASK); } /** * Extracts the register width from the parameters byte of a serialized * {@link HLLType#FULL} HLL. * * @param parametersByte the parameters byte of the serialized HLL * @return the register width of the serialized HLL * * @see #packParametersByte(int, int) */ public static int registerWidth(final byte parametersByte) { return ((parametersByte >>> LOG2_REGISTER_COUNT_BITS) & REGISTER_WIDTH_MASK) + 1; } /** * Extracts the log2(registerCount) from the parameters byte of a * serialized {@link HLLType#FULL} HLL. * * @param parametersByte the parameters byte of the serialized HLL * @return log2(registerCount) of the serialized HLL * * @see #packParametersByte(int, int) */ public static int registerCountLog2(final byte parametersByte) { return (parametersByte & LOG2_REGISTER_COUNT_MASK); } }