/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Dec 17, 2006
*/
package com.bigdata.io.compression;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import org.apache.log4j.Logger;
import com.bigdata.btree.IndexSegment;
import com.bigdata.io.ByteBufferInputStream;
import com.bigdata.io.ByteBufferOutputStream;
/**
* Bulk data (de-)compressor used for leaves in {@link IndexSegment}s. The
* compression and decompression operations of a given {@link RecordCompressor}
* reuse a shared instance buffer. Any decompression result is valid only until
* the next compression or decompression operation performed by that
* {@link RecordCompressor}. When used in a single-threaded context this reduces
* allocation while maximizing the opportunity for bulk transfers.
* <p>
* This class is NOT thread-safe.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
public class RecordCompressor implements Externalizable, IRecordCompressor {
protected static final Logger log = Logger.getLogger(CompressorRegistry.class);
/**
*
*/
private static final long serialVersionUID = -2028159717578047153L;
/**
* A huge portion of the cost associated with using {@link Deflater} is
* the initialization of a new instance. Since this code is designed to
* operate within a single-threaded environment, we just reuse the same
* instance for each invocation.
*/
private transient Deflater _deflater;
final private transient Inflater _inflater = new Inflater();
/**
* Reused on each decompression request and reallocated if buffer size would
* be exceeded. This will achieve a steady state sufficient to decompress
* any given input in a single pass.
*/
private transient byte[] _buf = new byte[1024];
/**
* The level specified to the ctor.
*/
private int level;
public String toString() {
return getClass().getName() + "{level=" + level + "}";
}
/**
* Create a record compressor.
*
* @param level
* The compression level.
*
* @see Deflater#BEST_SPEED
* @see Deflater#BEST_COMPRESSION
*/
public RecordCompressor(final int level) {
_deflater = new Deflater(level);
this.level = level;
}
/**
* De-serialization constructor.
*/
public RecordCompressor() {
}
public void compress(ByteBuffer bin, ByteBuffer out) {
compress(bin, new ByteBufferOutputStream(out));
}
public ByteBuffer compress(ByteBuffer bin) {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
compress(bin, out);
if (log.isTraceEnabled())
log.trace("Record compression from " + bin.limit() + " to " + out.size());
return ByteBuffer.wrap(out.toByteArray());
}
public void compress(final ByteBuffer bin, final OutputStream os) {
if (bin.hasArray() && bin.position() == 0
&& bin.limit() == bin.capacity()) {
/*
* The source buffer is backed by an array so we delegate using the
* position() and limit() of the source buffer and the backing
* array.
*/
compress(bin.array(), bin.position(), bin.limit(), os);
// Advance the position to the limit.
bin.position(bin.limit());
} else {
/*
* Figure out how much data needs to be written.
*/
final int size = bin.remaining();
/*
* If the shared buffer is not large enough then reallocate it as a
* sufficiently large buffer.
*/
if (_buf.length < size) {
_buf = new byte[size];
}
/*
* Copy the data from the ByteBuffer into the shared instance
* buffer.
*/
bin.get(_buf, 0, size);
/*
* Compress the data onto the output stream.
*/
compress(_buf, 0, size, os);
}
}
public void compress(final byte[] bytes, final OutputStream os) {
compress(bytes, 0, bytes.length, os);
}
public void compress(final byte[] bytes, final int off, final int len,
final OutputStream os) {
_deflater.reset(); // required w/ instance reuse.
final DeflaterOutputStream dos = new DeflaterOutputStream(os, _deflater);
try {
/*
* Write onto deflator that writes onto the output stream.
*/
dos.write(bytes, off, len);
/*
* Flush and close the deflator instance.
*
* Note: The caller is unable to do this as they do not have access
* to the {@link Deflator}. However, if this flushes through to the
* underlying sink then that could drive IOs without the application
* being aware that synchronous IO was occurring.
*/
dos.flush();
dos.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
public ByteBuffer decompress(final ByteBuffer bin) {
_inflater.reset(); // reset required by reuse.
final int size = bin.limit();
final InflaterInputStream iis = new InflaterInputStream(
new ByteBufferInputStream(bin), _inflater, size);
return decompress(iis);
}
public ByteBuffer decompress(final byte[] bin) {
_inflater.reset(); // reset required by reuse.
final int size = bin.length;
final InflaterInputStream iis = new InflaterInputStream(
new ByteArrayInputStream(bin), _inflater, size);
return decompress(iis);
}
/**
* This decompresses data into a shared instance byte[]. If the byte[] runs
* out of capacity then a new byte[] is allocated with twice the capacity,
* the data is copied into new byte[], and decompression continues. The
* shared instance byte[] is then returned to the caller. This approach is
* suited to single-threaded processes that achieve a suitable buffer size
* and then perform zero allocations thereafter.
*
* @return A read-only view onto a shared buffer. The data between
* position() and limit() are the decompressed data. The contents of
* this buffer are valid only until the next compression or
* decompression request. The position will be zero. The limit will
* be the #of decompressed bytes.
*/
protected ByteBuffer decompress(final InflaterInputStream iis) {
int off = 0;
try {
while (true) { // use bulk I/O.
int capacity = _buf.length - off;
if (capacity == 0) {
final byte[] tmp = new byte[_buf.length * 2];
System.arraycopy(_buf, 0, tmp, 0, off);
_buf = tmp;
capacity = _buf.length - off;
}
final int nread = iis.read(_buf, off, capacity);
if (nread == -1)
break; // EOF.
off += nread;
}
}
catch (IOException ex) {
throw new RuntimeException(ex);
}
//
// /*
// * make an exact fit copy of the uncompressed data and return it to
// * the caller.
// */
//
// byte[] tmp = new byte[off];
//
// System.arraycopy(_buf, 0, tmp, 0, off);
//
//// return tmp;
// return ByteBuffer.wrap(tmp, 0, off);
return ByteBuffer.wrap(_buf, 0, off).asReadOnlyBuffer();
}
public void readExternal(final ObjectInput in) throws IOException,
ClassNotFoundException {
level = in.readInt();
_deflater = new Deflater(level);
}
public void writeExternal(final ObjectOutput out) throws IOException {
out.writeInt(level);
}
}