/** * This code is released under the * Apache License Version 2.0 http://www.apache.org/licenses/. * * (c) Daniel Lemire, http://lemire.me/en/ */ package me.lemire.integercompression; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.IntBuffer; /** * Implementation of variable-byte. For best performance, use it using the * ByteIntegerCODEC interface. * * Note that this does not use differential coding: if you are working on sorted * lists, you must compute the deltas separately. * * @author Daniel Lemire */ public class VariableByte implements IntegerCODEC, ByteIntegerCODEC, SkippableIntegerCODEC { private static byte extract7bits(int i, long val) { return (byte) ((val >> (7 * i)) & ((1 << 7) - 1)); } private static byte extract7bitsmaskless(int i, long val) { return (byte) ((val >> (7 * i))); } @Override public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { headlessCompress(in, inpos, inlength, out, outpos); } @Override public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { if (inlength == 0) return; ByteBuffer buf = makeBuffer(inlength * 8); buf.order(ByteOrder.LITTLE_ENDIAN); for (int k = inpos.get(); k < inpos.get() + inlength; ++k) { final long val = in[k] & 0xFFFFFFFFL; // To be consistent with // unsigned integers in C/C++ if (val < (1 << 7)) { buf.put((byte) (val | (1 << 7))); } else if (val < (1 << 14)) { buf.put((byte) extract7bits(0, val)); buf.put((byte) (extract7bitsmaskless(1, (val)) | (1 << 7))); } else if (val < (1 << 21)) { buf.put((byte) extract7bits(0, val)); buf.put((byte) extract7bits(1, val)); buf.put((byte) (extract7bitsmaskless(2, (val)) | (1 << 7))); } else if (val < (1 << 28)) { buf.put((byte) extract7bits(0, val)); buf.put((byte) extract7bits(1, val)); buf.put((byte) extract7bits(2, val)); buf.put((byte) (extract7bitsmaskless(3, (val)) | (1 << 7))); } else { buf.put((byte) extract7bits(0, val)); buf.put((byte) extract7bits(1, val)); buf.put((byte) extract7bits(2, val)); buf.put((byte) extract7bits(3, val)); buf.put((byte) (extract7bitsmaskless(4, (val)) | (1 << 7))); } } while (buf.position() % 4 != 0) buf.put((byte) 0); final int length = buf.position(); buf.flip(); IntBuffer ibuf = buf.asIntBuffer(); ibuf.get(out, outpos.get(), length / 4); outpos.add(length / 4); inpos.add(inlength); } @Override public void compress(int[] in, IntWrapper inpos, int inlength, byte[] out, IntWrapper outpos) { if (inlength == 0) return; int outpostmp = outpos.get(); for (int k = inpos.get(); k < inpos.get() + inlength; ++k) { final long val = in[k] & 0xFFFFFFFFL; // To be consistent with // unsigned integers in C/C++ if (val < (1 << 7)) { out[outpostmp++] = (byte) (val | (1 << 7)); } else if (val < (1 << 14)) { out[outpostmp++] = (byte) extract7bits(0, val); out[outpostmp++] = (byte) (extract7bitsmaskless(1, (val)) | (1 << 7)); } else if (val < (1 << 21)) { out[outpostmp++] = (byte) extract7bits(0, val); out[outpostmp++] = (byte) extract7bits(1, val); out[outpostmp++] = (byte) (extract7bitsmaskless(2, (val)) | (1 << 7)); } else if (val < (1 << 28)) { out[outpostmp++] = (byte) extract7bits(0, val); out[outpostmp++] = (byte) extract7bits(1, val); out[outpostmp++] = (byte) extract7bits(2, val); out[outpostmp++] = (byte) (extract7bitsmaskless(3, (val)) | (1 << 7)); } else { out[outpostmp++] = (byte) extract7bits(0, val); out[outpostmp++] = (byte) extract7bits(1, val); out[outpostmp++] = (byte) extract7bits(2, val); out[outpostmp++] = (byte) extract7bits(3, val); out[outpostmp++] = (byte) (extract7bitsmaskless(4, (val)) | (1 << 7)); } } outpos.set(outpostmp); inpos.add(inlength); } @Override public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { int s = 0; int val = 0; int p = inpos.get(); int finalp = inpos.get() + inlength; int tmpoutpos = outpos.get(); for (int v = 0, shift = 0; p < finalp;) { val = in[p]; int c = (byte) (val >>> s); s += 8; p += s>>5; s = s & 31; v += ((c & 127) << shift); if ((c & 128) == 128) { out[tmpoutpos++] = v; v = 0; shift = 0; } else shift += 7; } outpos.set(tmpoutpos); inpos.add(inlength); } @Override public void uncompress(byte[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { int p = inpos.get(); int finalp = inpos.get() + inlength; int tmpoutpos = outpos.get(); for (int v = 0; p < finalp; out[tmpoutpos++] = v) { v = in[p] & 0x7F; if (in[p] < 0) { p += 1; continue; } v = ((in[p + 1] & 0x7F) << 7) | v; if (in[p + 1] < 0) { p += 2; continue; } v = ((in[p + 2] & 0x7F) << 14) | v; if (in[p + 2] < 0 ) { p += 3; continue; } v = ((in[p + 3] & 0x7F) << 21) | v; if (in[p + 3] < 0) { p += 4; continue; } v = ((in[p + 4] & 0x7F) << 28) | v; p += 5; } outpos.set(tmpoutpos); inpos.add(p); } @Override public String toString() { return this.getClass().getSimpleName(); } @Override public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num) { int s = 0; int val = 0; int p = inpos.get(); int tmpoutpos = outpos.get(); int finaloutpos = num + tmpoutpos; for (int v = 0, shift = 0; tmpoutpos < finaloutpos;) { val = in[p]; int c = val >>> s; s += 8; p += s>>5; s = s & 31; v += ((c & 127) << shift); if ((c & 128) == 128) { out[tmpoutpos++] = v; v = 0; shift = 0; } else shift += 7; } outpos.set(tmpoutpos); inpos.set(p + (s!=0 ? 1 : 0)); } /** * Creates a new buffer of the requested size. * * In case you need a different way to allocate buffers, you can override this method * with a custom behavior. The default implementation allocates a new Java direct * {@link ByteBuffer} on each invocation. */ protected ByteBuffer makeBuffer(int sizeInBytes) { return ByteBuffer.allocateDirect(sizeInBytes); } }