package FlexibleEncoding.Parquet;
/*
* adapted from Parquet*
*/
//import static parquet.Log.DEBUG;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* Decodes values written in the grammar described in {@link RunLengthBitPackingHybridEncoder}
*
* @author Julien Le Dem
*/
public class RunLengthBitPackingHybridDecoder {
private static final Log LOG = Log.getLog(RunLengthBitPackingHybridDecoder.class);
private static enum MODE { RLE, PACKED }
private final int bitWidth;
private final BytePacker packer;
private final ByteArrayInputStream in;
private MODE mode;
private int currentCount;
private int currentValue;
private int[] currentBuffer;
public RunLengthBitPackingHybridDecoder(int bitWidth, ByteArrayInputStream in) {
if (Log.DEBUG) LOG.debug("decoding bitWidth " + bitWidth);
Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
this.bitWidth = bitWidth;
this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
this.in = in;
}
public int readInt() throws IOException {
if (currentCount == 0) {
readNext();
}
-- currentCount;
int result;
switch (mode) {
case RLE:
result = currentValue;
break;
case PACKED:
result = currentBuffer[currentBuffer.length - 1 - currentCount];
break;
default:
throw new ParquetDecodingException("not a valid mode " + mode);
}
return result;
}
private void readNext() throws IOException {
Preconditions.checkArgument(in.available() > 0, "Reading past RLE/BitPacking stream.");
final int header = BytesUtils.readUnsignedVarInt(in);
mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED;
switch (mode) {
case RLE:
currentCount = header >>> 1;
if (Log.DEBUG) LOG.debug("reading " + currentCount + " values RLE");
currentValue = BytesUtils.readIntLittleEndianPaddedOnBitWidth(in, bitWidth);
break;
case PACKED:
int numGroups = header >>> 1;
currentCount = numGroups * 8;
if (Log.DEBUG) LOG.debug("reading " + currentCount + " values BIT PACKED");
currentBuffer = new int[currentCount]; // TODO: reuse a buffer
byte[] bytes = new byte[numGroups * bitWidth];
// At the end of the file RLE data though, there might not be that many bytes left.
int bytesToRead = (int)Math.ceil(currentCount * bitWidth / 8.0);
bytesToRead = Math.min(bytesToRead, in.available());
new DataInputStream(in).readFully(bytes, 0, bytesToRead);
for (int valueIndex = 0, byteIndex = 0; valueIndex < currentCount; valueIndex += 8, byteIndex += bitWidth) {
packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex);
}
break;
default:
throw new ParquetDecodingException("not a valid mode " + mode);
}
}
}