/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.util.hll; /** * A corresponding deserializer for {@link BigEndianAscendingWordSerializer}. */ class BigEndianAscendingWordDeserializer implements IWordDeserializer { // The number of bits per byte. private static final int BITS_PER_BYTE = 8; // long mask for the maximum value stored in a byte private static final long BYTE_MASK = (1L << BITS_PER_BYTE) - 1L; // ************************************************************************ // The length in bits of the words to be read. private final int wordLength; // The byte array to which the words are serialized. private final byte[] bytes; // The number of leading padding bytes in 'bytes' to be ignored. private final int bytePadding; // The number of words that the byte array contains. private final int wordCount; // The current read state. private int currentWordIndex; // ======================================================================== /** * @param wordLength the length in bits of the words to be deserialized. Must * be less than or equal to 64 and greater than or equal to 1. * @param bytePadding the number of leading bytes that pad the serialized words. * Must be greater than or equal to zero. * @param bytes the byte array containing the serialized words. Cannot be * <code>null</code>. */ public BigEndianAscendingWordDeserializer(final int wordLength, final int bytePadding, final byte[] bytes) { if((wordLength < 1) || (wordLength > 64)) { throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")"); } if(bytePadding < 0) { throw new IllegalArgumentException("Byte padding must be >= zero. (was: " + bytePadding + ")"); } this.wordLength = wordLength; this.bytes = bytes; this.bytePadding = bytePadding; final int dataBytes = (bytes.length - bytePadding); final long dataBits = (dataBytes * BITS_PER_BYTE); this.wordCount = (int)(dataBits/wordLength); currentWordIndex = 0; } // ======================================================================== /* (non-Javadoc) * @see net.agkn.hll.serialization.IWordDeserializer#readWord() */ @Override public long readWord() { final long word = readWord(currentWordIndex); currentWordIndex++; return word; } // ------------------------------------------------------------------------ /** * Reads the word at the specified sequence position (zero-indexed). * * @param position the zero-indexed position of the word to be read. This * must be greater than or equal to zero. * @return the value of the serialized word at the specified position. */ private long readWord(final int position) { if(position < 0) { throw new ArrayIndexOutOfBoundsException(position); } // First bit of the word final long firstBitIndex = ((long)position) * ((long)wordLength); final int firstByteIndex = (bytePadding + (int)(firstBitIndex / BITS_PER_BYTE)); final int firstByteSkipBits = (int)(firstBitIndex % BITS_PER_BYTE); // Last bit of the word final long lastBitIndex = (firstBitIndex + wordLength - 1); final int lastByteIndex = (bytePadding + (int)(lastBitIndex / BITS_PER_BYTE)); final int lastByteBitsToConsume; final int bitsAfterByteBoundary = (int)((lastBitIndex + 1) % BITS_PER_BYTE); // If the word terminates at the end of the last byte, consume the whole // last byte. if(bitsAfterByteBoundary == 0) { lastByteBitsToConsume = BITS_PER_BYTE; } else { // Otherwise, only consume what is necessary. lastByteBitsToConsume = bitsAfterByteBoundary; } if(lastByteIndex >= bytes.length) { throw new ArrayIndexOutOfBoundsException("Word out of bounds of backing array."); } // Accumulator long value = 0; // -------------------------------------------------------------------- // First byte final int bitsRemainingInFirstByte = (BITS_PER_BYTE - firstByteSkipBits); final int bitsToConsumeInFirstByte = Math.min(bitsRemainingInFirstByte, wordLength); long firstByte = (long)bytes[firstByteIndex]; // Mask off the bits to skip in the first byte. final long firstByteMask = ((1L << bitsRemainingInFirstByte) - 1L); firstByte &= firstByteMask; // Right-align relevant bits of first byte. firstByte >>>= (bitsRemainingInFirstByte - bitsToConsumeInFirstByte); value |= firstByte; // If the first byte contains the whole word, short-circuit. if(firstByteIndex == lastByteIndex) { return value; } // -------------------------------------------------------------------- // Middle bytes final int middleByteCount = (lastByteIndex - firstByteIndex - 1); for(int i=0; i<middleByteCount; i++) { final long middleByte = (bytes[firstByteIndex + i + 1] & BYTE_MASK); // Push middle byte onto accumulator. value <<= BITS_PER_BYTE; value |= middleByte; } // -------------------------------------------------------------------- // Last byte long lastByte = (bytes[lastByteIndex] & BYTE_MASK); lastByte >>= (BITS_PER_BYTE - lastByteBitsToConsume); value <<= lastByteBitsToConsume; value |= lastByte; return value; } /* (non-Javadoc) * @see net.agkn.hll.serialization.IWordDeserializer#totalWordCount() */ @Override public int totalWordCount() { return wordCount; } }