/*
* Terrier - Terabyte Retriever
* Webpage: http://ir.dcs.gla.ac.uk/terrier
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - Department of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is BitOutputStream.java.
*
* The Original Code is Copyright (C) 2004-2008 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
* Roi Blanco
*/
package ivory.core.compression;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.log4j.Logger;
/**
* This class provides methods to write compressed integers to an outputstream.<br>
* The numbers are written into a byte starting from the most significant bit (i.e, left to right).
* There is an internal int buffer used before writting the bytes to the underlying stream,
* and the bytes are written into 32-bits integers.
*
* @author Roi Blanco
*
*/
public class BitOutputStream {
/** the logger for this class */
protected static Logger logger = Logger.getRootLogger();
/** Writing buffer */
protected byte[] buffer;
/** poijnter for the buffer */
protected int bufferPointer;
/** size of the buffer it has to be 4 * k*/
protected int bufferSize;
/** Default size for the buffer*/
protected static final int DEFAULT_SIZE = 16 * 1024 ;
/** The private output stream used internaly.*/
protected DataOutputStream dos = null;
/** The byte offset.*/
protected long byteOffset;
/** The bit offset.*/
protected int bitOffset;
/** A int to write to the stream. */
protected int byteToWrite;
/**
* Initialises the variables in the stream. Used internally.
*/
private void init(){
byteOffset = 0;
bitOffset = 32;
byteToWrite = 0;
buffer = new byte[DEFAULT_SIZE];
bufferSize = DEFAULT_SIZE;
}
/**
* Empty constructor
*/
public BitOutputStream(){}
/**
* Constructs an instance of the class for a given OutputSTream
* @param os the java.io.OutputStream used for writting
* @throws java.io.IOException if an I/O error occurs
*/
public BitOutputStream(OutputStream os) throws IOException {
dos = new DataOutputStream(os);
init();
}
/**
* Returns the byte offset of the stream.
* It corresponds to the position of the
* byte in which the next bit will be written.
* @return the byte offset in the stream.
*/
public long getByteOffset() {
return byteOffset * 4 + ((32 - bitOffset) / 8);
}
/**
* Returns the bit offset in the last byte.
* It corresponds to the position in which
* the next bit will be written.
* @return the bit offset in the stream.
*/
public byte getBitOffset() {
return (byte)((32 - bitOffset) % 8);
}
/**
* Flushes the int currently being written into the buffer, and if it is necessary,
* it flush the buffer to the underlying OutputStream
* @param writeMe int to be written into the buffer
* @throws IOException if an I/O error occurs
*/
private void writeIntBuffer(int writeMe) throws IOException{
buffer[bufferPointer++] = (byte)(writeMe >>> 24);
buffer[bufferPointer++] = (byte)(writeMe >>> 16);
buffer[bufferPointer++] = (byte)(writeMe >>> 8);
buffer[bufferPointer++] = (byte)writeMe;
byteOffset++;
if(bufferPointer == bufferSize){
dos.write(buffer,0,bufferPointer);
bufferPointer = 0;
}
}
/**
* Writes a number in the current byte we are using.
* @param b the number to write
* @param len the length of the number in bits
* @return the number of bits written
* @throws IOException if an I/O error occurs.
*/
private int writeInCurrent( final int b, final int len ) throws IOException {
// This check is necessaty because x << 32 = x and not 0
if(len > 0){
byteToWrite |= b << (bitOffset-=len);
if ( bitOffset == 0 ) {
writeIntBuffer(byteToWrite);
bitOffset = 32;
byteToWrite = 0;
}
}
return len;
}
/**
* Writes an integer x using unary encoding. The encoding is a sequence of x -1 zeros and 1 one:
* 1, 01, 001, 0001, etc ..
* This method is not failsafe, it doesn't check if the argument is 0 or negative.
* @param x the number to write
* @return the number of bis written
* @throws IOException if an I/O error occurs.
*/
public int writeUnary( int x ) throws IOException{
if(bitOffset >= x) return writeInCurrent(1, x);//+1
final int shift = bitOffset;
x -= shift;
writeIntBuffer(byteToWrite);
bitOffset = 32;
byteToWrite = 0;
int i = x -1 >> 5;
while( i-- != 0 ) writeIntBuffer( 0 );
writeInCurrent( 1, ( (x-1) & 31) + 1 );
return x + shift ;
}
/**
* Writes an integer x into the stream using gamma encoding.
* This method is not failsafe, it doesn't check if the argument is 0 or negative.
* @param x the int number to write
* @return the number of bits written
* @throws IOException if an I/O error occurs.
*/
public int writeGamma( int x ) throws IOException {
final int msb = BitUtilities.mostSignificantBit( x ) ;
final int l = writeUnary( msb + 1 );
return l + ( writeInt( x , msb ) );
}
/**
* Writes an integer x into the stream using delta encoding.
* This method is not failsafe, it doesn't check if the argument is 0 or negative.
* @param x the int number to write
* @return the number of bits written
* @throws IOException if an I/O error occurs.
*/
public int writeDelta( int x ) throws IOException {
final int msb = BitUtilities.mostSignificantBit( ++x );
final int l = writeGamma( msb );
return l + ( msb != 0 ? writeInt( x, msb ) : 0 );
}
/**
* Writes an integer x into the underlying OutputStream. First, it checks if it fits into the current
* byte we are using for writting, and then it writes as many bytes as necessary
* @param x the int to write
* @param len length of the int in bits
* @return the number of bits written
* @throws IOException if an I/O error occurs.
*/
public int writeInt( int x, final int len ) throws IOException {
if ( bitOffset >= len ) return writeInCurrent( x, len );
// number of bits to be written in the last int
final int queue = ( len - bitOffset ) & 31;
writeInCurrent( x >> queue, bitOffset );
writeInCurrent( x , queue);
return len;
}
/** @deprecated */
public void flush()
{}
/**
* Closes the BitOutputStream. It flushes the variables and buffer first.
* @throws IOException if an I/O error occurs when closing the underlying OutputStream
*/
public void close() throws IOException{
writeIntBufferToBit(byteToWrite,bitOffset);
dos.write(buffer,0,bufferPointer);
dos.write(0);
dos.close();
}
/**
* Writes and integer x into the stream using skewed-golomb coding.
* Consider a bucket-vector <code> v = (b, 2b, 4b, ... , 2^i b, ...) </code> .<br>
* An integer <code>x</code> is coded as <code>unary(k+1)</code> where <code>k</code> is the index
* <code>sum(i=0)(k) v_i < x <= sum(i=0)(k+1)</code> <br> and the remainder with <code>log(v_k)</code> bits in binary. <br>
* <code>k = log(x/b + 1)</code>
* and <code>sum_i = b(2^n -1)</code> (geometric progression)), so
* if <code> lower = ceil(x/b) -> lower = 2^i * b -> i = log(ceil(x/b)) + 1</code>
* the remainder <code>x - sum_i 2^i*b - 1 = x - b(2^n - 1) - 1</code> is coded with <code>floor(log(v_k))</code> bits
* <br>
* This method is not failsafe, it doesn't check if the argument or the modulus is 0 or negative.
* @param x the number to write
* @param b the parameter for golomb coding
* @return the number of bits written
* @throws IOException if and I/O error occurs
*/
public int writeSkewedGolomb( final int x, final int b ) throws IOException {
final int i = BitUtilities.mostSignificantBit( x / b + 1 );
final int l = writeUnary( i + 1 );
final int M = ( ( 1 << i + 1 ) - 1 ) * b;
final int m = ( M / ( 2 * b ) ) * b;
return l + writeMinimalBinary( x - m , M - m );
}
/** Writes a sequence of integers using interpolative coding. The data must be sorted (increasing order).
*
* @param data the vector containing the integer sequence.
* @param offset the offset into <code>data</code> where the sequence starts.
* @param len the number of integers to code.
* @param lo a lower bound (must be smaller than or equal to the first integer in the sequence).
* @param hi an upper bound (must be greater than or equal to the last integer in the sequence).
* @return the number of written bits.
* @throws IOException if an I/O error occurs.
*/
public int writeInterpolativeCode( int data[], int offset, int len, int lo, int hi ) throws IOException {
final int h, m;
int l;
if ( len == 0 ) return 0;
if ( len == 1 ) return writeMinimalBinary( data[offset] - lo , hi - lo );
h = len / 2;
m = data[ offset + h ];
l = writeMinimalBinary( m - ( lo + h) , hi - len + h + 1 - ( lo + h ) );
l += writeInterpolativeCode( data, offset, h, lo, m - 1 );
return l + writeInterpolativeCode( data, offset + h + 1, len - h - 1, m + 1, hi );
}
/**
* Writes and integer x into the stream using golomb coding.
* This method is not failsafe, it doesn't check if the argument or the modulus is 0 or negative.
* @param x the number to write
* @param b the parameter for golomb coding
* @return the number of bits written
* @throws IOException if and I/O error occurs
*/
public int writeGolomb( final int x, final int b ) throws IOException {
final int q = (x - 1) / b;
final int l = writeUnary( q + 1 );
return l + writeMinimalBinary( x - q*b - 1, b );
}
/**
* Writes an integer x using minimal binary encoding, given an upper bound.
* This method is not failsafe, it doesn't check if the argument is 0 or negative.
* @param x the number to write
* @param b and strict bound for <code>x</code>
* @return the number of bits written
* @throws IOException if an I/O error occurs.
*/
public int writeMinimalBinary( final int x, final int b ) throws IOException {
final int log2b = BitUtilities.mostSignificantBit(b);
// Numbers smaller than m are encoded in log2b bits.
final int m = ( 1 << log2b + 1 ) - b;
if ( x < m )
return writeInt( x, log2b );
else
return writeInt( m + x, log2b + 1 );
}
/**
* Writes the current integer used into the buffer, taking into account the number of bits written.
* Used when closing the file, to avoid unecessary byte writes.
* in that integer so far.
* @param writeMe int to write
* @param bitOffset number of bits written so far in the int
*/
private void writeIntBufferToBit(int writeMe, int bitOffset){
if(bitOffset < 32 ) buffer[bufferPointer++] = (byte)(writeMe >>> 24);
if(bitOffset < 24 ) buffer[bufferPointer++] = (byte)(writeMe >>> 16);
if(bitOffset < 16 ) buffer[bufferPointer++] = (byte)(writeMe >>> 8);
if(bitOffset < 8 ) buffer[bufferPointer++] = (byte)(writeMe);
byteOffset++;
}
/**
* Appends a byte array to the current stream.
* Flushes the current int, the buffer and then writes the new sequence of bytes.
* @param toAppend byte[] it is going to be written to the stream.
* @param len length in bytes of the byte buffer (number of elements of the array).
* @throws IOException if an I/O exception occurs.
*/
public void append(byte[] toAppend, int len) throws IOException{
writeIntBufferToBit(byteToWrite,bitOffset);
dos.write(buffer,0,bufferPointer);
dos.write(toAppend, 0, len);
byteToWrite = 0;
byteOffset+= (len >> 4) + 1;
bufferPointer = 0;
bitOffset = 32;
}
/**
* Appends a byte array to the current stream, where the last byte is not fully written
* Flushes the current int, the buffer and then writes the new sequence of bytes.
* @param toAppend byte[] it is going to be written to the stream.
* @param len length in bytes of the byte buffer (number of elements of the array).
* @param newByte last byte (the one not fully written)
* @param bitswritten number of bits written in the last byte
* @throws IOException if an I/O exception occurs.
*/
public void append(byte[] toAppend, int len, byte newByte, int bitswritten) throws IOException{
writeIntBufferToBit(byteToWrite,bitOffset);
dos.write(buffer,0,bufferPointer);
dos.write(toAppend, 0, len);
byteToWrite = ((int)newByte) << 24;
byteOffset+= (len >> 4);
bufferPointer = 0;
bitOffset = 32 - bitswritten;
}
/**
* Pads the current byte and writes the current int into the buffer.
* Then, it flushes the buffer to the underlying OutputStream.
* @throws IOException if an I/O error occurs.
*/
public void padAndFlush() throws IOException{
writeIntBufferToBit(byteToWrite,bitOffset);
dos.write(buffer,0,bufferPointer);
byteToWrite = 0;
byteOffset++;
bufferPointer = 0;
bitOffset = 32;
}
/**
* Writes an integer in binary format to the stream.
* @param len size in bits of the number.
* @param x the integer to write.
* @return the number of bits written.
* @throws IOException if an I/O error occurs.
*/
public int writeBinary(int len, int x) throws IOException{
return writeInt(x,len);
}
}