package org.apache.hadoop.io.simpleseekableformat; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; /** * This InputStream removes the metadata from the underlying stream. */ class InterleavedInputStream extends InputStream { public interface MetaDataConsumer { /** * This function should read a metadata block with size metaDataBlockSize. * This function should throw EOFException if there are not enough bytes * in the InputStream. * @param in The raw input stream */ void readMetaData(InputStream in, int metaDataBlockSize) throws IOException; } public static class DefaultMetaDataConsumer implements MetaDataConsumer { @Override public void readMetaData(InputStream in, int metaDataBlockSize) throws IOException { long toSkip = metaDataBlockSize; while (toSkip > 0) { long skipped = in.skip(toSkip); if (skipped <= 0) { throw new EOFException("Incomplete metadata section. Should be " + metaDataBlockSize + " bytes but got only " + (metaDataBlockSize - toSkip) + " bytes and then skip returns " + skipped); } toSkip -= skipped; } } } private final InputStream in; private final int metaDataBlockSize; private final int dataBlockSize; private final MetaDataConsumer metaDataConsumer; private long completeMetaDataBlocks; private int currentDataBlockSize; private boolean eofReached; /** * @param in THe code assumes that (in.available() == 0) means EOF. * The code will break if that requirement is not met by * the InputStream in. Note that most InputStreams like * {@link java.util.zip.InflaterInputStream#available()} * support this assumption. */ InterleavedInputStream(InputStream in, int metaDataBlockSize, int dataBlockSize, MetaDataConsumer metaDataConsumer) { this.in = in; this.metaDataBlockSize = metaDataBlockSize; this.dataBlockSize = dataBlockSize; this.metaDataConsumer = metaDataConsumer; // Signal that we need to read metadata block first. currentDataBlockSize = dataBlockSize; eofReached = false; } /** * @param in in.available() should return > 0 unless EOF */ InterleavedInputStream(InputStream in, int metaDataBlockSize, int dataBlockSize) { this(in, metaDataBlockSize, dataBlockSize, new DefaultMetaDataConsumer()); } /** * Number of bytes read from the underlying stream. */ public long getOffset() { return completeMetaDataBlocks * metaDataBlockSize + getDataOffset(); } /** * Number of data bytes read from the underlying stream. */ public long getDataOffset() { return (completeMetaDataBlocks - 1) * dataBlockSize + currentDataBlockSize; } /** * Returns whether we've reached EOF. */ private boolean readMetaDataIfNeeded() throws IOException { if (eofReached) { return false; } if (currentDataBlockSize == dataBlockSize) { try { metaDataConsumer.readMetaData(in, metaDataBlockSize); completeMetaDataBlocks ++; currentDataBlockSize = 0; } catch (EOFException e) { eofReached = true; return false; } } return true; } @Override public int read() throws IOException { if (!readMetaDataIfNeeded()) { return -1; } int result = in.read(); if (result >= 0) { // don't do this if read() returns -1, which means EOF. currentDataBlockSize ++; } else { eofReached = true; } return result; } @Override public int read(byte[] b, int start, int length) throws IOException { if (!readMetaDataIfNeeded()) { return -1; } int toRead = Math.min(length, dataBlockSize - currentDataBlockSize); int read = in.read(b, start, toRead); if (read >= 0) { currentDataBlockSize += read; } else { eofReached = true; } return read; } @Override public void close() throws IOException { in.close(); } @Override public int available() throws IOException { int rawAvailable = in.available(); // Before the next meta block int currentBlockLeft = Math.min(dataBlockSize - currentDataBlockSize, rawAvailable); rawAvailable -= currentBlockLeft; // How many full blocks are there? int fullBlocks = rawAvailable / (metaDataBlockSize + dataBlockSize); // How many bytes left besides the full blocks? rawAvailable = rawAvailable % (metaDataBlockSize + dataBlockSize); // Anything partial data blocks? int partialBlockLeft = Math.max(0, rawAvailable - metaDataBlockSize); return currentBlockLeft + fullBlocks * dataBlockSize + partialBlockLeft; } }