package org.apache.hadoop.io.simpleseekableformat;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.zip.CRC32;
import org.apache.hadoop.io.compress.CompressionCodec;
/**
* This class holds the data related to a single data segment.
*/
class DataSegmentWriter {
// empty string for no compression
private final String codecName;
private final byte[] codecNameUTF8;
// either uncompressedData or compressedData
private final SimpleSeekableFormat.Buffer storedData;
// CRC32 value
private final long crc32Value;
/**
* Create a new data segment from uncompressed data and a codec.
* This is called by the writer.
*/
DataSegmentWriter(SimpleSeekableFormat.Buffer uncompressedData, CompressionCodec codec) throws IOException {
// Try compress
if (codec != null) {
SimpleSeekableFormat.Buffer compressedData = new SimpleSeekableFormat.Buffer();
OutputStream out = codec.createOutputStream(compressedData);
out.write(uncompressedData.getData(), 0, uncompressedData.getLength());
out.close();
// Don't compress if the result is longer than uncompressed data.
if (compressedData.getLength() + codec.getClass().getName().length() < uncompressedData.getLength()) {
codecName = codec.getClass().getName();
storedData = compressedData;
} else {
codecName = "";
storedData = uncompressedData;
}
} else {
// no compression
codecName = "";
storedData = uncompressedData;
}
codecNameUTF8 = getCodecNameUTF8(codecName);
// Calculate CRC32
CRC32 crc32 = new CRC32();
crc32.update(storedData.getData(), 0, storedData.getLength());
crc32Value = crc32.getValue();
}
// Write this data segment into an OutputStream
void writeTo(DataOutputStream out) throws IOException {
// We do the UTF8 conversion ourselves instead of relying on DataOutput
// to ensure we strictly follow UTF-8 standard, as well as better performance,
// and save the code to count the UTF-8 bytes (we need that to calculate
// the total length.
int length = 8 /*crc32*/
+ 2 /*utf8 length*/ + codecNameUTF8.length
+ storedData.getLength();
out.writeInt(length);
out.writeShort(codecNameUTF8.length);
out.write(codecNameUTF8);
out.writeLong(crc32Value);
out.write(storedData.getData(), 0, storedData.getLength());
}
/**
* Utility static fields.
*/
static final ConcurrentHashMap<String, byte[]> CODEC_NAME_CACHE = new ConcurrentHashMap<String, byte[]>();
/**
* Convert from String to UTF8 byte array.
*/
static byte[] getCodecNameUTF8(String compressionCodecName) {
byte[] codecNameBytes = CODEC_NAME_CACHE.get(compressionCodecName);
if (codecNameBytes == null) {
try {
codecNameBytes = compressionCodecName.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
CODEC_NAME_CACHE.put(compressionCodecName, codecNameBytes);
}
return codecNameBytes;
}
}