/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.util; import java.util.Arrays; import java.util.List; import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF; /** * Class that Posting and PostingVector use to write byte * streams into shared fixed-size byte[] arrays. The idea * is to allocate slices of increasing lengths For * example, the first slice is 5 bytes, the next slice is * 14, etc. We start by writing our bytes into the first * 5 bytes. When we hit the end of the slice, we allocate * the next slice and then write the address of the new * slice into the last 4 bytes of the previous slice (the * "forwarding address"). * * Each slice is filled with 0's initially, and we mark * the end with a non-zero byte. This way the methods * that are writing into the slice don't need to record * its length and instead allocate a new slice once they * hit a non-zero byte. * * @lucene.internal **/ public final class ByteBlockPool { public final static int BYTE_BLOCK_SHIFT = 15; public final static int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT; public final static int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1; /** Abstract class for allocating and freeing byte * blocks. */ public abstract static class Allocator { protected final int blockSize; public Allocator(int blockSize) { this.blockSize = blockSize; } public abstract void recycleByteBlocks(byte[][] blocks, int start, int end); public void recycleByteBlocks(List<byte[]> blocks) { final byte[][] b = blocks.toArray(new byte[blocks.size()][]); recycleByteBlocks(b, 0, b.length); } public byte[] getByteBlock() { return new byte[blockSize]; } } /** A simple {@link Allocator} that never recycles. */ public static final class DirectAllocator extends Allocator { public DirectAllocator() { this(BYTE_BLOCK_SIZE); } public DirectAllocator(int blockSize) { super(blockSize); } @Override public void recycleByteBlocks(byte[][] blocks, int start, int end) { } } /** A simple {@link Allocator} that never recycles, but * tracks how much total RAM is in use. */ public static class DirectTrackingAllocator extends Allocator { private final Counter bytesUsed; public DirectTrackingAllocator(Counter bytesUsed) { this(BYTE_BLOCK_SIZE, bytesUsed); } public DirectTrackingAllocator(int blockSize, Counter bytesUsed) { super(blockSize); this.bytesUsed = bytesUsed; } @Override public byte[] getByteBlock() { bytesUsed.addAndGet(blockSize); return new byte[blockSize]; } @Override public void recycleByteBlocks(byte[][] blocks, int start, int end) { bytesUsed.addAndGet(-((end-start)* blockSize)); for (int i = start; i < end; i++) { blocks[i] = null; } } }; /** * array of buffers currently used in the pool. Buffers are allocated if * needed don't modify this outside of this class. */ public byte[][] buffers = new byte[10][]; /** index into the buffers array pointing to the current buffer used as the head */ private int bufferUpto = -1; // Which buffer we are upto /** Where we are in head buffer */ public int byteUpto = BYTE_BLOCK_SIZE; /** Current head buffer */ public byte[] buffer; /** Current head offset */ public int byteOffset = -BYTE_BLOCK_SIZE; private final Allocator allocator; public ByteBlockPool(Allocator allocator) { this.allocator = allocator; } /** * Resets the pool to its initial state reusing the first buffer and fills all * buffers with <tt>0</tt> bytes before they reused or passed to * {@link Allocator#recycleByteBlocks(byte[][], int, int)}. Calling * {@link ByteBlockPool#nextBuffer()} is not needed after reset. */ public void reset() { reset(true, true); } /** * Expert: Resets the pool to its initial state reusing the first buffer. Calling * {@link ByteBlockPool#nextBuffer()} is not needed after reset. * @param zeroFillBuffers if <code>true</code> the buffers are filled with <tt>0</tt>. * This should be set to <code>true</code> if this pool is used with slices. * @param reuseFirst if <code>true</code> the first buffer will be reused and calling * {@link ByteBlockPool#nextBuffer()} is not needed after reset iff the * block pool was used before ie. {@link ByteBlockPool#nextBuffer()} was called before. */ public void reset(boolean zeroFillBuffers, boolean reuseFirst) { if (bufferUpto != -1) { // We allocated at least one buffer if (zeroFillBuffers) { for(int i=0;i<bufferUpto;i++) { // Fully zero fill buffers that we fully used Arrays.fill(buffers[i], (byte) 0); } // Partial zero fill the final buffer Arrays.fill(buffers[bufferUpto], 0, byteUpto, (byte) 0); } if (bufferUpto > 0 || !reuseFirst) { final int offset = reuseFirst ? 1 : 0; // Recycle all but the first buffer allocator.recycleByteBlocks(buffers, offset, 1+bufferUpto); Arrays.fill(buffers, offset, 1+bufferUpto, null); } if (reuseFirst) { // Re-use the first buffer bufferUpto = 0; byteUpto = 0; byteOffset = 0; buffer = buffers[0]; } else { bufferUpto = -1; byteUpto = BYTE_BLOCK_SIZE; byteOffset = -BYTE_BLOCK_SIZE; buffer = null; } } } /** * Advances the pool to its next buffer. This method should be called once * after the constructor to initialize the pool. In contrast to the * constructor a {@link ByteBlockPool#reset()} call will advance the pool to * its first buffer immediately. */ public void nextBuffer() { if (1+bufferUpto == buffers.length) { byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1, NUM_BYTES_OBJECT_REF)][]; System.arraycopy(buffers, 0, newBuffers, 0, buffers.length); buffers = newBuffers; } buffer = buffers[1+bufferUpto] = allocator.getByteBlock(); bufferUpto++; byteUpto = 0; byteOffset += BYTE_BLOCK_SIZE; } /** * Allocates a new slice with the given size. * @see ByteBlockPool#FIRST_LEVEL_SIZE */ public int newSlice(final int size) { if (byteUpto > BYTE_BLOCK_SIZE-size) nextBuffer(); final int upto = byteUpto; byteUpto += size; buffer[byteUpto-1] = 16; return upto; } // Size of each slice. These arrays should be at most 16 // elements (index is encoded with 4 bits). First array // is just a compact way to encode X+1 with a max. Second // array is the length of each slice, ie first slice is 5 // bytes, next slice is 14 bytes, etc. /** * An array holding the offset into the {@link ByteBlockPool#LEVEL_SIZE_ARRAY} * to quickly navigate to the next slice level. */ public final static int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; /** * An array holding the level sizes for byte slices. */ public final static int[] LEVEL_SIZE_ARRAY = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200}; /** * The first level size for new slices * @see ByteBlockPool#newSlice(int) */ public final static int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0]; /** * Creates a new byte slice with the given starting size and * returns the slices offset in the pool. */ public int allocSlice(final byte[] slice, final int upto) { final int level = slice[upto] & 15; final int newLevel = NEXT_LEVEL_ARRAY[level]; final int newSize = LEVEL_SIZE_ARRAY[newLevel]; // Maybe allocate another block if (byteUpto > BYTE_BLOCK_SIZE-newSize) { nextBuffer(); } final int newUpto = byteUpto; final int offset = newUpto + byteOffset; byteUpto += newSize; // Copy forward the past 3 bytes (which we are about // to overwrite with the forwarding address): buffer[newUpto] = slice[upto-3]; buffer[newUpto+1] = slice[upto-2]; buffer[newUpto+2] = slice[upto-1]; // Write forwarding address at end of last slice: slice[upto-3] = (byte) (offset >>> 24); slice[upto-2] = (byte) (offset >>> 16); slice[upto-1] = (byte) (offset >>> 8); slice[upto] = (byte) offset; // Write new level: buffer[byteUpto-1] = (byte) (16|newLevel); return newUpto+3; } /** Fill the provided {@link BytesRef} with the bytes at the specified offset/length slice. * This will avoid copying the bytes, if the slice fits into a single block; otherwise, it uses * the provided {@link BytesRefBuilder} to copy bytes over. */ void setBytesRef(BytesRefBuilder builder, BytesRef result, long offset, int length) { result.length = length; int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT); byte[] buffer = buffers[bufferIndex]; int pos = (int) (offset & BYTE_BLOCK_MASK); if (pos + length <= BYTE_BLOCK_SIZE) { // common case where the slice lives in a single block: just reference the buffer directly without copying result.bytes = buffer; result.offset = pos; } else { // uncommon case: the slice spans at least 2 blocks, so we must copy the bytes: builder.grow(length); result.bytes = builder.get().bytes; result.offset = 0; readBytes(offset, result.bytes, 0, length); } } // Fill in a BytesRef from term's length & bytes encoded in // byte block public void setBytesRef(BytesRef term, int textStart) { final byte[] bytes = term.bytes = buffers[textStart >> BYTE_BLOCK_SHIFT]; int pos = textStart & BYTE_BLOCK_MASK; if ((bytes[pos] & 0x80) == 0) { // length is 1 byte term.length = bytes[pos]; term.offset = pos+1; } else { // length is 2 bytes term.length = (bytes[pos]&0x7f) + ((bytes[pos+1]&0xff)<<7); term.offset = pos+2; } assert term.length >= 0; } /** * Appends the bytes in the provided {@link BytesRef} at * the current position. */ public void append(final BytesRef bytes) { int bytesLeft = bytes.length; int offset = bytes.offset; while (bytesLeft > 0) { int bufferLeft = BYTE_BLOCK_SIZE - byteUpto; if (bytesLeft < bufferLeft) { // fits within current buffer System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bytesLeft); byteUpto += bytesLeft; break; } else { // fill up this buffer and move to next one if (bufferLeft > 0) { System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bufferLeft); } nextBuffer(); bytesLeft -= bufferLeft; offset += bufferLeft; } } } /** * Reads bytes bytes out of the pool starting at the given offset with the given * length into the given byte array at offset <tt>off</tt>. * <p>Note: this method allows to copy across block boundaries.</p> */ public void readBytes(final long offset, final byte bytes[], int bytesOffset, int bytesLength) { int bytesLeft = bytesLength; int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT); int pos = (int) (offset & BYTE_BLOCK_MASK); while (bytesLeft > 0) { byte[] buffer = buffers[bufferIndex++]; int chunk = Math.min(bytesLeft, BYTE_BLOCK_SIZE - pos); System.arraycopy(buffer, pos, bytes, bytesOffset, chunk); bytesOffset += chunk; bytesLeft -= chunk; pos = 0; } } /** * Set the given {@link BytesRef} so that its content is equal to the * {@code ref.length} bytes starting at {@code offset}. Most of the time this * method will set pointers to internal data-structures. However, in case a * value crosses a boundary, a fresh copy will be returned. * On the contrary to {@link #setBytesRef(BytesRef, int)}, this does not * expect the length to be encoded with the data. */ public void setRawBytesRef(BytesRef ref, final long offset) { int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT); int pos = (int) (offset & BYTE_BLOCK_MASK); if (pos + ref.length <= BYTE_BLOCK_SIZE) { ref.bytes = buffers[bufferIndex]; ref.offset = pos; } else { ref.bytes = new byte[ref.length]; ref.offset = 0; readBytes(offset, ref.bytes, 0, ref.length); } } /** Read a single byte at the given {@code offset}. */ public byte readByte(long offset) { int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT); int pos = (int) (offset & BYTE_BLOCK_MASK); byte[] buffer = buffers[bufferIndex]; return buffer[pos]; } }