package org.apache.hadoop.io.simpleseekableformat; import java.io.DataOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.Arrays; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.io.simpleseekableformat.SimpleSeekableFormat.OffsetPair; import org.apache.hadoop.util.ReflectionUtils; /** * Write data in Seekable File Format. * Data from a single write will be in a single data segment. * * See {@link SimpleSeekableFormat} */ public class SimpleSeekableFormatOutputStream extends CompressionOutputStream implements Configurable { /** * This is a hint. The actual max can go beyond this number if a lot of data are * sent via a single write. A single write will always be in the same data segment. */ private static final int DEFAULT_MAX_UNCOMPRESSED_SEGMENT_LENGTH = 1024 * 1024; /** * dataSegmentOut is a wrapper stream that automatically inserts MetaDataBlocks * while writing out data segments. */ final InterleavedOutputStream dataSegmentOut; /** * dataSegmentDataOut is a DataOutputStream wrapping dataSegmentOut. */ private final DataOutputStream dataSegmentDataOut; private Configuration conf; private Class<? extends CompressionCodec> codecClass; private CompressionCodec codec; private int maxUncompressedSegmentLength; private OffsetPair lastOffsets = new OffsetPair(); private final SimpleSeekableFormat.Buffer currentDataSegmentBuffer = new SimpleSeekableFormat.Buffer(); public SimpleSeekableFormatOutputStream(OutputStream out) { this(new DataOutputStream(out)); } /** * DataOutputStream allows easy write of integer, string etc. */ protected SimpleSeekableFormatOutputStream(DataOutputStream out) { // We don't use the inherited field "out" at all. super(null); this.dataSegmentOut = new InterleavedOutputStream(out, SimpleSeekableFormat.METADATA_BLOCK_LENGTH, SimpleSeekableFormat.DATA_BLOCK_LENGTH, new MetaDataProducer() ); this.dataSegmentDataOut = new DataOutputStream(dataSegmentOut); } private static byte[] NULLS = new byte[1024]; static { Arrays.fill(NULLS, (byte)0); } /** * This inner class provides the metadata block. * Note that it accesses the lastOffsets field. */ class MetaDataProducer implements InterleavedOutputStream.MetaDataProducer { /** * @param out The raw output stream. */ @Override public void writeMetaData(DataOutputStream out, int metaDataBlockSize) throws IOException { // Magic header and version out.write(SimpleSeekableFormat.MAGIC_HEADER_BYTES); out.writeInt(SimpleSeekableFormat.VERSION); // Write out the offset pair out.writeLong(lastOffsets.uncompressedOffset); out.writeLong(lastOffsets.compressedOffset); // Fill up the bytes int left = metaDataBlockSize - SimpleSeekableFormat.MAGIC_HEADER_BYTES.length - 4 - 8 - 8; while (left > 0) { int toWrite = Math.min(left, NULLS.length); out.write(NULLS, 0, toWrite); left -= toWrite; } } } @Override public Configuration getConf() { return conf; } @Override public void setConf(Configuration conf) { this.conf = conf; // Set the codec codecClass = conf.getClass(SimpleSeekableFormat.FILEFORMAT_SSF_CODEC_CONF, null, CompressionCodec.class); if (codecClass == null) { codec = null; } else { codec = ReflectionUtils.newInstance(codecClass, conf); } // Set the max segment length maxUncompressedSegmentLength = conf.getInt( SimpleSeekableFormat.FILEFORMAT_SSF_MAX_UNCOMPRESSED_SEGMENT_LENGTH, DEFAULT_MAX_UNCOMPRESSED_SEGMENT_LENGTH); } @Override public void write(int b) throws IOException { currentDataSegmentBuffer.write(b); flushIfNeeded(); } /** * This function makes sure the whole buffer is written into the same data segment. */ @Override public void write(byte[] b, int start, int length) throws IOException { currentDataSegmentBuffer.write(b, start, length); flushIfNeeded(); } @Override public void close() throws IOException { flush(); dataSegmentDataOut.close(); } private void flushIfNeeded() throws IOException { if (currentDataSegmentBuffer.size() >= maxUncompressedSegmentLength) { flush(); } } /** * Take the current data segment, optionally compress it, * calculate the crc32, and then write it out. */ @Override public void flush() throws IOException { DataSegmentWriter currentDataSegment = new DataSegmentWriter(currentDataSegmentBuffer, codec); currentDataSegment.writeTo(dataSegmentDataOut); // Clear out the current buffer currentDataSegmentBuffer.reset(); // Update the latest offsets lastOffsets.uncompressedOffset += currentDataSegmentBuffer.size(); lastOffsets.compressedOffset = dataSegmentOut.getDataOffset(); } @Override public void finish() throws IOException { throw new RuntimeException("SeekableFileOutputStream does not support finish()"); } @Override public void resetState() throws IOException { throw new RuntimeException("SeekableFileOutputStream does not support resetState()"); } }