/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.util.packed; import java.io.EOFException; import java.io.IOException; import java.util.Arrays; import org.apache.lucene.store.DataOutput; /** * Class for writing packed integers to be directly read from Directory. * Integers can be read on-the-fly via {@link DirectReader}. * <p> * Unlike PackedInts, it optimizes for read i/o operations and supports > 2B values. * Example usage: * <pre class="prettyprint"> * int bitsPerValue = DirectWriter.bitsRequired(100); // values up to and including 100 * IndexOutput output = dir.createOutput("packed", IOContext.DEFAULT); * DirectWriter writer = DirectWriter.getInstance(output, numberOfValues, bitsPerValue); * for (int i = 0; i < numberOfValues; i++) { * writer.add(value); * } * writer.finish(); * output.close(); * </pre> * @see DirectReader */ public final class DirectWriter { final int bitsPerValue; final long numValues; final DataOutput output; long count; boolean finished; // for now, just use the existing writer under the hood int off; final byte[] nextBlocks; final long[] nextValues; final BulkOperation encoder; final int iterations; DirectWriter(DataOutput output, long numValues, int bitsPerValue) { this.output = output; this.numValues = numValues; this.bitsPerValue = bitsPerValue; encoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue); iterations = encoder.computeIterations((int) Math.min(numValues, Integer.MAX_VALUE), PackedInts.DEFAULT_BUFFER_SIZE); nextBlocks = new byte[iterations * encoder.byteBlockCount()]; nextValues = new long[iterations * encoder.byteValueCount()]; } /** Adds a value to this writer */ public void add(long l) throws IOException { assert bitsPerValue == 64 || (l >= 0 && l <= PackedInts.maxValue(bitsPerValue)) : bitsPerValue; assert !finished; if (count >= numValues) { throw new EOFException("Writing past end of stream"); } nextValues[off++] = l; if (off == nextValues.length) { flush(); } count++; } private void flush() throws IOException { encoder.encode(nextValues, 0, nextBlocks, 0, iterations); final int blockCount = (int) PackedInts.Format.PACKED.byteCount(PackedInts.VERSION_CURRENT, off, bitsPerValue); output.writeBytes(nextBlocks, blockCount); Arrays.fill(nextValues, 0L); off = 0; } /** finishes writing */ public void finish() throws IOException { if (count != numValues) { throw new IllegalStateException("Wrong number of values added, expected: " + numValues + ", got: " + count); } assert !finished; flush(); // pad for fast io: we actually only need this for certain BPV, but its just 3 bytes... for (int i = 0; i < 3; i++) { output.writeByte((byte) 0); } finished = true; } /** Returns an instance suitable for encoding {@code numValues} using {@code bitsPerValue} */ public static DirectWriter getInstance(DataOutput output, long numValues, int bitsPerValue) { if (Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) < 0) { throw new IllegalArgumentException("Unsupported bitsPerValue " + bitsPerValue + ". Did you use bitsRequired?"); } return new DirectWriter(output, numValues, bitsPerValue); } /** * Round a number of bits per value to the next amount of bits per value that * is supported by this writer. * * @param bitsRequired the amount of bits required * @return the next number of bits per value that is gte the provided value * and supported by this writer */ private static int roundBits(int bitsRequired) { int index = Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsRequired); if (index < 0) { return SUPPORTED_BITS_PER_VALUE[-index-1]; } else { return bitsRequired; } } /** * Returns how many bits are required to hold values up * to and including maxValue * * @param maxValue the maximum value that should be representable. * @return the amount of bits needed to represent values from 0 to maxValue. * @see PackedInts#bitsRequired(long) */ public static int bitsRequired(long maxValue) { return roundBits(PackedInts.bitsRequired(maxValue)); } /** * Returns how many bits are required to hold values up * to and including maxValue, interpreted as an unsigned value. * * @param maxValue the maximum value that should be representable. * @return the amount of bits needed to represent values from 0 to maxValue. * @see PackedInts#unsignedBitsRequired(long) */ public static int unsignedBitsRequired(long maxValue) { return roundBits(PackedInts.unsignedBitsRequired(maxValue)); } final static int SUPPORTED_BITS_PER_VALUE[] = new int[] { 1, 2, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64 }; }