package me.lemire.integercompression; import me.lemire.integercompression.differential.*; import java.util.*; import org.junit.Test; /** * * */ public class ExampleTest { /** * */ @Test public void superSimpleExample() { IntegratedIntCompressor iic = new IntegratedIntCompressor(); int[] data = new int[2342351]; for (int k = 0; k < data.length; ++k) data[k] = k; System.out.println("Compressing " + data.length + " integers using friendly interface"); int[] compressed = iic.compress(data); int[] recov = iic.uncompress(compressed); System.out .println("compressed from " + data.length * 4 / 1024 + "KB to " + compressed.length * 4 / 1024 + "KB"); if (!Arrays.equals(recov, data)) throw new RuntimeException("bug"); } /** * */ @Test public void basicExample() { int[] data = new int[2342351]; System.out.println("Compressing " + data.length + " integers in one go"); // data should be sorted for best // results for (int k = 0; k < data.length; ++k) data[k] = k; // Very important: the data is in sorted order!!! If not, you // will get very poor compression with IntegratedBinaryPacking, // you should use another CODEC. // next we compose a CODEC. Most of the processing // will be done with binary packing, and leftovers will // be processed using variable byte IntegratedIntegerCODEC codec = new IntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); // output vector should be large enough... int[] compressed = new int[data.length + 1024]; // compressed might not be large enough in some cases // if you get java.lang.ArrayIndexOutOfBoundsException, try // allocating more memory /** * * compressing * */ IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); codec.compress(data, inputoffset, data.length, compressed, outputoffset); // got it! // inputoffset should be at data.length but outputoffset tells // us where we are... System.out.println( "compressed from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); // we can repack the data: (optional) compressed = Arrays.copyOf(compressed, outputoffset.intValue()); /** * * now uncompressing * * This assumes that we otherwise know how many integers have been * compressed. See basicExampleHeadless for a more general case. */ int[] recovered = new int[data.length]; IntWrapper recoffset = new IntWrapper(0); codec.uncompress(compressed, new IntWrapper(0), compressed.length, recovered, recoffset); if (Arrays.equals(data, recovered)) System.out.println("data is recovered without loss"); else throw new RuntimeException("bug"); // could use assert System.out.println(); } /** * Like the basicExample, but we store the input array size manually. */ @Test public void basicExampleHeadless() { int[] data = new int[2342351]; System.out.println("Compressing " + data.length + " integers in one go using the headless approach"); // data should be sorted for best // results for (int k = 0; k < data.length; ++k) data[k] = k; // Very important: the data is in sorted order!!! If not, you // will get very poor compression with IntegratedBinaryPacking, // you should use another CODEC. // next we compose a CODEC. Most of the processing // will be done with binary packing, and leftovers will // be processed using variable byte SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); // output vector should be large enough... int[] compressed = new int[data.length + 1024]; // compressed might not be large enough in some cases // if you get java.lang.ArrayIndexOutOfBoundsException, try // allocating more memory /** * * compressing * */ IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(1); compressed[0] = data.length; // we manually store how many integers we codec.headlessCompress(data, inputoffset, data.length, compressed, outputoffset, new IntWrapper(0)); // got it! // inputoffset should be at data.length but outputoffset tells // us where we are... System.out.println( "compressed from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); // we can repack the data: (optional) compressed = Arrays.copyOf(compressed, outputoffset.intValue()); /** * * now uncompressing * */ int howmany = compressed[0];// we manually stored the number of // compressed integers int[] recovered = new int[howmany]; IntWrapper recoffset = new IntWrapper(0); codec.headlessUncompress(compressed, new IntWrapper(1), compressed.length, recovered, recoffset, howmany, new IntWrapper(0)); if (Arrays.equals(data, recovered)) System.out.println("data is recovered without loss"); else throw new RuntimeException("bug"); // could use assert System.out.println(); } /** * This is an example to show you can compress unsorted integers as long as * most are small. */ @Test public void unsortedExample() { final int N = 1333333; int[] data = new int[N]; // initialize the data (most will be small for (int k = 0; k < N; k += 1) data[k] = 3; // throw some larger values for (int k = 0; k < N; k += 5) data[k] = 100; for (int k = 0; k < N; k += 533) data[k] = 10000; int[] compressed = new int[N + 1024];// could need more IntegerCODEC codec = new Composition(new FastPFOR(), new VariableByte()); // compressing IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); codec.compress(data, inputoffset, data.length, compressed, outputoffset); System.out.println("compressed unsorted integers from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); // we can repack the data: (optional) compressed = Arrays.copyOf(compressed, outputoffset.intValue()); int[] recovered = new int[N]; IntWrapper recoffset = new IntWrapper(0); codec.uncompress(compressed, new IntWrapper(0), compressed.length, recovered, recoffset); if (Arrays.equals(data, recovered)) System.out.println("data is recovered without loss"); else throw new RuntimeException("bug"); // could use assert System.out.println(); } /** * This is like the basic example, but we show how to process larger arrays * in chunks. * * Some of this code was written by Pavel Klinov. */ @Test public void advancedExample() { int TotalSize = 2342351; // some arbitrary number int ChunkSize = 16384; // size of each chunk, choose a multiple of 128 System.out.println("Compressing " + TotalSize + " integers using chunks of " + ChunkSize + " integers (" + ChunkSize * 4 / 1024 + "KB)"); System.out.println("(It is often better for applications to work in chunks fitting in CPU cache.)"); int[] data = new int[TotalSize]; // data should be sorted for best // results for (int k = 0; k < data.length; ++k) data[k] = k; // next we compose a CODEC. Most of the processing // will be done with binary packing, and leftovers will // be processed using variable byte, using variable byte // only for the last chunk! IntegratedIntegerCODEC regularcodec = new IntegratedBinaryPacking(); IntegratedVariableByte ivb = new IntegratedVariableByte(); IntegratedIntegerCODEC lastcodec = new IntegratedComposition(regularcodec, ivb); // output vector should be large enough... int[] compressed = new int[TotalSize + 1024]; /** * * compressing * */ IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); for (int k = 0; k < TotalSize / ChunkSize; ++k) regularcodec.compress(data, inputoffset, ChunkSize, compressed, outputoffset); lastcodec.compress(data, inputoffset, TotalSize % ChunkSize, compressed, outputoffset); // got it! // inputoffset should be at data.length but outputoffset tells // us where we are... System.out.println( "compressed from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); // we can repack the data: compressed = Arrays.copyOf(compressed, outputoffset.intValue()); /** * * now uncompressing * * We are *not* assuming that the original array length is known, * however we assume that the chunk size (ChunkSize) is known. * */ int[] recovered = new int[ChunkSize]; IntWrapper compoff = new IntWrapper(0); IntWrapper recoffset; int currentpos = 0; while (compoff.get() < compressed.length) { recoffset = new IntWrapper(0); regularcodec.uncompress(compressed, compoff, compressed.length - compoff.get(), recovered, recoffset); if (recoffset.get() < ChunkSize) {// last chunk detected ivb.uncompress(compressed, compoff, compressed.length - compoff.get(), recovered, recoffset); } for (int i = 0; i < recoffset.get(); ++i) { if (data[currentpos + i] != recovered[i]) throw new RuntimeException("bug"); // could use assert } currentpos += recoffset.get(); } System.out.println("data is recovered without loss"); System.out.println(); } /** * Demo of the headless approach where we must supply the array length */ @Test public void headlessDemo() { System.out.println("Compressing arrays with minimal header..."); int[] uncompressed1 = { 1, 2, 1, 3, 1 }; int[] uncompressed2 = { 3, 2, 4, 6, 1 }; int[] compressed = new int[uncompressed1.length + uncompressed2.length + 1024]; SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); // compressing IntWrapper outPos = new IntWrapper(); IntWrapper previous = new IntWrapper(); codec.headlessCompress(uncompressed1, new IntWrapper(), uncompressed1.length, compressed, outPos); int length1 = outPos.get() - previous.get(); previous = new IntWrapper(outPos.get()); codec.headlessCompress(uncompressed2, new IntWrapper(), uncompressed2.length, compressed, outPos); int length2 = outPos.get() - previous.get(); compressed = Arrays.copyOf(compressed, length1 + length2); System.out .println("compressed unsorted integers from " + uncompressed1.length * 4 + "B to " + length1 * 4 + "B"); System.out .println("compressed unsorted integers from " + uncompressed2.length * 4 + "B to " + length2 * 4 + "B"); System.out.println("Total compressed output " + compressed.length); int[] recovered1 = new int[uncompressed1.length]; int[] recovered2 = new int[uncompressed1.length]; IntWrapper inPos = new IntWrapper(); System.out.println("Decoding first array starting at pos = " + inPos); codec.headlessUncompress(compressed, inPos, compressed.length, recovered1, new IntWrapper(0), uncompressed1.length); System.out.println("Decoding second array starting at pos = " + inPos); codec.headlessUncompress(compressed, inPos, compressed.length, recovered2, new IntWrapper(0), uncompressed2.length); if (!Arrays.equals(uncompressed1, recovered1)) throw new RuntimeException("First array does not match."); if (!Arrays.equals(uncompressed2, recovered2)) throw new RuntimeException("Second array does not match."); System.out.println("The arrays match, your code is probably ok."); } }