package org.apache.hadoop.io.simpleseekableformat;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
/**
* SimpleSeekableFormat supports seek based on compressed byte offsets as well
* as uncompressed byte offsets.
*
* File Format Description:
* 1. Metadata blocks and data blocks
* Each 1K bytes at the beginning of x MB is a metadata block.
* The rest of 1023K bytes are data blocks.
*
* 2. Metadata block (1024 bytes):
* Each metata block looks like this:
* 32 bytes: "SSF_Magic_C17e5C697a00bB1A859aD\n"
* 4 bytes: version number, now is 1.
* 16 bytes: 8-byte of uncompressed data stream offset
* + 8-byte of compressed data stream offset
*
* 3. Data block (1023 * 1024 bytes):
* All data blocks should be concatenated to be a stream. The stream consists
* of consecutive data segments, back by back.
*
* 4. Data segment:
* Each data segment looks like this:
* 4 bytes: length (implies that a single data segment cannot be longer than
* 4GB). It does not include the length field itself, but includes
* all following fields like codec name and crc32 checksum.
* 2 bytes: byte length of compression codec class name.
* x-bytes: UTF-8 encoded compression codec class name.
* 8 bytes: crc32 checksum of the data following.
* length - 8 - 2 - x bytes: actual data
*
* This class encapsulates all underlying logics of the SeekableFileFormat.
*
* NOTE: Requirement on the CompressionCodec InputStream: available() should
* only return 0 when EOF. Otherwise SeekableFileInputStream.available() will
* break.
*/
class SimpleSeekableFormat {
public static final String FILEFORMAT_SSF_CODEC_CONF = "fileformat.ssf.codec";
public static final String FILEFORMAT_SSF_MAX_UNCOMPRESSED_SEGMENT_LENGTH =
"fileformat.ssf.max.uncompressed.segment.length";
static final int METADATA_BLOCK_LENGTH = 1024;
static final int DATA_BLOCK_LENGTH = 1024 * 1024 - METADATA_BLOCK_LENGTH;
static final int VERSION = 1;
static final String MAGIC_HEADER = "SSF_Magic_C17e5C697a00bB1A859aD\n";
static final byte[] MAGIC_HEADER_BYTES;
static {
try {
MAGIC_HEADER_BYTES = MAGIC_HEADER.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
static class OffsetPair {
long uncompressedOffset;
long compressedOffset;
void readFrom(DataInputStream in) throws IOException {
uncompressedOffset = in.readLong();
compressedOffset = in.readLong();
}
void writeTo(DataOutputStream out) throws IOException {
out.writeLong(uncompressedOffset);
out.writeLong(compressedOffset);
}
};
static class Buffer extends ByteArrayOutputStream {
public byte[] getData() { return buf; }
public int getLength() { return count; }
public void reset() { count = 0; }
}
}