package ivory.bloomir.data;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import com.google.common.base.Preconditions;
import com.kamikaze.pfordelta.PForDelta;
import org.apache.hadoop.io.Writable;
/**
* A compressed postings list representation. This class uses
* PForDelta to compress a given set of document ids into
* blocks of equal size.
*
* @author Nima Asadi
*/
public class CompressedPostings implements Writable {
private static int blockSize = 128;
private int[][] compressedBlocks;
private int lastBlockSize;
protected CompressedPostings() {
}
protected static void setBlockSize(int bSize) {
Preconditions.checkArgument(bSize > 0);
blockSize = bSize;
}
/**
* Compresses the given input and generates a new
* instance of this class.
*
* @param data Array of sorted document ids
* @return an instance of this class which contains the
* given data in a compresed format.
*/
public static CompressedPostings newInstance(int[] data) {
Preconditions.checkNotNull(data);
CompressedPostings postings = new CompressedPostings();
postings.compressData(data);
return postings;
}
protected void compressData(int[] data) {
Preconditions.checkNotNull(data);
// Data is stored in blocks of equal size..
int nbBlocks = (int) Math.ceil(((double) data.length) / ((double) blockSize));
compressedBlocks = new int[nbBlocks][];
int[] temp = new int[blockSize];
// Compress all blocks except for the last block which might
// contain fewer elements.
for(int i = 0; i < nbBlocks - 1; i++) {
temp[0] = data[i * blockSize];
int pre = temp[0];
for(int j = 1; j < temp.length; j++) {
temp[j] = data[i * blockSize + j] - pre;
pre = data[i * blockSize + j];
}
compressedBlocks[i] = PForDelta.compressOneBlockOpt(temp, blockSize);
}
// Compress the last block
int remaining = data.length - ((nbBlocks - 1) * blockSize);
temp = new int[remaining];
temp[0] = data[(nbBlocks - 1) * blockSize];
int pre = temp[0];
for(int j = 1; j < temp.length; j++) {
temp[j] = data[(nbBlocks - 1) * blockSize + j] - pre;
pre = data[(nbBlocks - 1) * blockSize + j];
}
compressedBlocks[nbBlocks - 1] = PForDelta.compressOneBlockOpt(temp, remaining);
lastBlockSize = remaining;
}
/**
* Decompresses a block, stores the decompressed data in an array and returns
* the number of decompressed elements as output.
*
* To read the n_th element of this block, add outBlock[n-1] to the current
* <i>value</i>. That is:
*
* <pre>
* original_data[0] = outBlock(0)<br>
* original_data[1] = original_data[0] + outBlock[1]<br>
* original_data[2] = original_data[1] + outBlock[2]<br>
* ...<br>
* original_data[n-1] = original_data[n-2] + outBlock[n-1]<br>
* </pre>
*
* @param outBlock Array to store the decompressed values. Note that the size
* of this array must be at least equal to the size of each block.
* @param blockNumber The block index to decompress.
* @return Number of elements in the decompressed array, starting from index 0.
*/
public int decompressBlock(int[] outBlock, int blockNumber) {
Preconditions.checkNotNull(outBlock);
Preconditions.checkArgument(blockNumber >= 0 && blockNumber < compressedBlocks.length);
if(blockNumber != compressedBlocks.length - 1) {
PForDelta.decompressOneBlock(outBlock, compressedBlocks[blockNumber], blockSize);
return blockSize;
} else {
PForDelta.decompressOneBlock(outBlock, compressedBlocks[blockNumber], lastBlockSize);
return lastBlockSize;
}
}
/**
* @return The number of blocks in this postings list.
*/
public int getBlockCount() {
return compressedBlocks.length;
}
/**
* @return The actual block size.
*/
public static int getBlockSize() {
return blockSize;
}
/**
* Computes the block number of a given index. For efficiency
* purposes we do not check the range of the input index against
* the number of elements in this postings list.
*
* @param index Index of an element in the original data
* @return Block number.
*/
public int getBlockNumber(int index) {
Preconditions.checkArgument(index >= 0);
return (int) Math.floor(((double) index) / ((double) blockSize));
}
/**
* Returns the index (in the original data) of the
* first element in the compressed block.
*
* @param blockNumber Block index.
* @return Index of the first value in the specified block.
*/
public int getBlockStartIndex(int blockNumber) {
Preconditions.checkArgument(blockNumber >= 0 && blockNumber < compressedBlocks.length);
return blockNumber * blockSize;
}
/**
* @param index Index of an element in the original data
* @return Whether or not this element is the first element of a
* block. For more information on how to read the elements
* please refer to {@link #decompressBlock}
*/
public boolean isFirstElementInBlock(int index) {
Preconditions.checkArgument(index >= 0);
return (getPositionInBlock(index) == 0);
}
/**
* @param index Index of an element in the original data
* @return Index of the element in the block.
*/
public int getPositionInBlock(int index) {
Preconditions.checkArgument(index >= 0);
return index % blockSize;
}
/**
* Reads an instance of this class from a given input.
*
* @param input Data input
* @return An instance of this class.
*/
public static CompressedPostings readInstance(DataInput input) throws IOException {
Preconditions.checkNotNull(input);
CompressedPostings postings = new CompressedPostings();
postings.readFields(input);
return postings;
}
@Override public void readFields(DataInput input) throws IOException {
Preconditions.checkNotNull(input);
lastBlockSize = input.readInt();
compressedBlocks = new int[input.readInt()][];
for(int i = 0; i < compressedBlocks.length; i++) {
compressedBlocks[i] = new int[input.readInt()];
for(int j = 0; j < compressedBlocks[i].length; j++) {
compressedBlocks[i][j] = input.readInt();
}
}
}
@Override public void write(DataOutput output) throws IOException {
Preconditions.checkNotNull(output);
output.writeInt(lastBlockSize);
output.writeInt(compressedBlocks.length);
for(int i = 0; i < compressedBlocks.length; i++) {
output.writeInt(compressedBlocks[i].length);
for(int j = 0; j < compressedBlocks[i].length; j++) {
output.writeInt(compressedBlocks[i][j]);
}
}
}
@Override public boolean equals(Object o) {
Preconditions.checkNotNull(o);
Preconditions.checkArgument(o instanceof CompressedPostings);
CompressedPostings other = (CompressedPostings) o;
if(this.lastBlockSize != other.lastBlockSize) {
return false;
}
if(this.compressedBlocks.length != other.compressedBlocks.length) {
return false;
}
for(int i = 0; i < this.compressedBlocks.length; i++) {
if(this.compressedBlocks[i].length != other.compressedBlocks[i].length) {
return false;
}
for(int j = 0; j < this.compressedBlocks[i].length; j++) {
if(this.compressedBlocks[i][j] != other.compressedBlocks[i][j]) {
return false;
}
}
}
return true;
}
}