/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.sqoop.io; import java.io.BufferedOutputStream; import java.io.Closeable; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.apache.commons.io.output.CloseShieldOutputStream; import org.apache.commons.io.output.CountingOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.Compressor; import org.apache.hadoop.io.compress.CompressorStream; import org.apache.hadoop.io.compress.Decompressor; import org.apache.hadoop.io.compress.DecompressorStream; import com.cloudera.sqoop.io.LobReaderCache; import com.cloudera.sqoop.util.RandomHash; /** * File format which stores large object records. * The format allows large objects to be read through individual InputStreams * to allow reading without full materialization of a single record. * Each record is assigned an id and can be accessed by id efficiently by * consulting an index at the end of the file. * * The LobFile format is specified at: * http://wiki.github.com/cloudera/sqoop/sip-3 */ public final class LobFile { public static final Log LOG = LogFactory.getLog(LobFile.class.getName()); public static final int LATEST_LOB_VERSION = 0; public static final char[] HEADER_ID_STR = { 'L', 'O', 'B' }; //Value for entryId to write to the beginning of an IndexSegment. public static final long SEGMENT_HEADER_ID = -1; //Value for entryId to write before the finale. public static final long SEGMENT_OFFSET_ID = -2; //Value for entryID to write before the IndexTable public static final long INDEX_TABLE_ID = -3; private LobFile() { } /** * Creates a LobFile Reader configured to read from the specified file. */ public static com.cloudera.sqoop.io.LobFile.Reader open(Path p, Configuration conf) throws IOException { FileSystem fs = p.getFileSystem(conf); FileStatus [] stats = fs.listStatus(p); if (null == stats || stats.length == 0) { throw new IOException("Could not find file: " + p); } FSDataInputStream fis = fs.open(p); DataInputStream dis = new DataInputStream(fis); LobFileHeader header = new LobFileHeader(dis); int version = header.getVersion(); if (version == 0) { return new V0Reader(p, conf, header, dis, fis, stats[0].getLen()); } else { throw new IOException("No reader available for LobFile version " + version); } } /** * Creates a LobFile Writer. * @param p the path to create. * @param conf the configuration to use to interact with the filesystem. * @param isCharData true if this is for CLOBs, false for BLOBs. * @param codec the compression codec to use (or null for none). * @param entriesPerSegment number of entries per index segment. */ public static com.cloudera.sqoop.io.LobFile.Writer create(Path p, Configuration conf, boolean isCharData, String codec, int entriesPerSegment) throws IOException { return new V0Writer(p, conf, isCharData, codec, entriesPerSegment); } /** * Creates a LobFile Writer. * @param p the path to create. * @param conf the configuration to use to interact with the filesystem. * @param isCharData true if this is for CLOBs, false for BLOBs. * @param codec the compression codec to use (or null for none). */ public static com.cloudera.sqoop.io.LobFile.Writer create(Path p, Configuration conf, boolean isCharData, String codec) throws IOException { return create(p, conf, isCharData, codec, V0Writer.DEFAULT_MAX_SEGMENT_ENTRIES); } /** * Creates a LobFile Writer configured for uncompressed data. * @param p the path to create. * @param conf the configuration to use to interact with the filesystem. * @param isCharData true if this is for CLOBs, false for BLOBs. */ public static com.cloudera.sqoop.io.LobFile.Writer create(Path p, Configuration conf, boolean isCharData) throws IOException { return create(p, conf, isCharData, null); } /** * Creates a LobFile Writer configured for uncompressed binary data. * @param p the path to create. * @param conf the configuration to use to interact with the filesystem. */ public static com.cloudera.sqoop.io.LobFile.Writer create(Path p, Configuration conf) throws IOException { return create(p, conf, false); } /** * Class that writes out a LobFile. Instantiate via LobFile.create(). */ public abstract static class Writer implements Closeable { /** * If this Writer is writing to a physical LobFile, then this returns * the file path it is writing to. Otherwise it returns null. * @return the fully-qualified path being written to by this writer. */ public abstract Path getPath(); /** * Finishes writing the LobFile and closes underlying handles. */ public abstract void close() throws IOException; @Override protected synchronized void finalize() throws Throwable { close(); super.finalize(); } /** * Terminates the current record and writes any trailing zero-padding * required by the specified record size. * This is implicitly called between consecutive writeBlobRecord() / * writeClobRecord() calls. */ public abstract void finishRecord() throws IOException; /** * Declares a new BLOB record to be written to the file. * @param len the "claimed" number of bytes that will be written to * this record. The actual number of bytes may differ. */ public abstract OutputStream writeBlobRecord(long len) throws IOException; /** * Declares a new CLOB record to be written to the file. * @param len the claimed number of characters that will be written to * this record. The actual number of characters may differ. */ public abstract java.io.Writer writeClobRecord(long len) throws IOException; /** * Report the current position in the output file. * @return the number of bytes written through this Writer. */ public abstract long tell() throws IOException; /** * Checks whether an underlying stream is present or null. * @param out the stream to check for null-ness. * @throws IOException if out is null. */ protected void checkForNull(OutputStream out) throws IOException { if (null == out) { throw new IOException("Writer has been closed."); } } } /** * Class that can read a LobFile. Create with LobFile.open(). */ public abstract static class Reader implements Closeable { /** * If this Reader is reading from a physical LobFile, then this returns * the file path it is reading from. Otherwise it returns null. * @return the fully-qualified path being read by this reader. */ public abstract Path getPath(); /** * Report the current position in the file. Note that the internal * cursor may move in an unpredictable fashion; e.g., to fetch * additional data from the index stored at the end of the file. * Clients may be more interested in the getRecordOffset() method * which returns the starting offset of the current record. * @return the current offset from the start of the file in bytes. */ public abstract long tell() throws IOException; /** * Move the file pointer to the first available full record beginning at * position 'pos', relative to the start of the file. After calling * seek(), you will need to call next() to move to the record itself. * @param pos the position to seek to or past. */ public abstract void seek(long pos) throws IOException; /** * Advances to the next record in the file. * @return true if another record exists, or false if the * end of the file has been reached. */ public abstract boolean next() throws IOException; /** * @return true if we have aligned the Reader (through a call to next()) * onto a record. */ public abstract boolean isRecordAvailable(); /** * Reports the length of the record to the user. * If next() has not been called, or seek() has been called without * a subsequent call to next(), or next() returned false, the return * value of this method is undefined. * @return the 'claimedLen' field of the current record. For * character-based records, this is often in characters, not bytes. * Records may have more bytes associated with them than are reported * by this method, but never fewer. */ public abstract long getRecordLen(); /** * Return the entryId of the current record to the user. * If next() has not been called, or seek() has been called without * a subsequent call to next(), or next() returned false, the return * value of this method is undefined. * @return the 'entryId' field of the current record. */ public abstract long getRecordId(); /** * Return the byte offset at which the current record starts. * If next() has not been called, or seek() has been called without * a subsequent call to next(), or next() returned false, the return * value of this method is undefined. * @return the byte offset of the beginning of the current record. */ public abstract long getRecordOffset(); /** * @return an InputStream allowing the user to read the next binary * record from the file. */ public abstract InputStream readBlobRecord() throws IOException; /** * @return a java.io.Reader allowing the user to read the next character * record from the file. */ public abstract java.io.Reader readClobRecord() throws IOException; /** * Closes the reader. */ public abstract void close() throws IOException; /** * Checks whether an underlying stream is present or null. * @param in the stream to check for null-ness. * @throws IOException if in is null. */ protected void checkForNull(InputStream in) throws IOException { if (null == in) { throw new IOException("Reader has been closed."); } } /** * @return true if the Reader.close() method has been called. */ public abstract boolean isClosed(); @Override protected synchronized void finalize() throws Throwable { close(); super.finalize(); } } /** * Represents a header block in a LobFile. Can write a new header * block (and generate a record start mark), or read an existing * header block. */ private static class LobFileHeader implements Writable { private int version; private RecordStartMark startMark; private MetaBlock metaBlock; /** * Create a new LobFileHeader. */ public LobFileHeader() { this.version = LATEST_LOB_VERSION; this.startMark = new RecordStartMark(); this.metaBlock = new MetaBlock(); } /** * Read a LobFileHeader from an existing file. */ public LobFileHeader(DataInput in) throws IOException { readFields(in); } /** * Write a LobFile header to an output sink. */ public void write(DataOutput out) throws IOException { // Start with the file type identification. for (char c : HEADER_ID_STR) { out.writeByte((int) c); } // Write the format version WritableUtils.writeVInt(out, this.version); startMark.write(out); metaBlock.write(out); } public void readFields(DataInput in) throws IOException { char [] chars = new char[3]; for (int i = 0; i < 3; i++) { chars[i] = (char) in.readByte(); } // Check that these match what we expect. Throws IOE if not. checkHeaderChars(chars); this.version = WritableUtils.readVInt(in); if (this.version != LATEST_LOB_VERSION) { // Right now we only have one version we can handle. throw new IOException("Unexpected LobFile version " + this.version); } this.startMark = new RecordStartMark(in); this.metaBlock = new MetaBlock(in); } /** * Checks that a header array matches the standard LobFile header. * Additional data at the end of the headerStamp is ignored. * @param headerStamp the header bytes received from the file. * @throws IOException if it doesn't. */ private void checkHeaderChars(char [] headerStamp) throws IOException { if (headerStamp.length != HEADER_ID_STR.length) { throw new IOException("Invalid LobFile header stamp: expected length " + HEADER_ID_STR.length); } for (int i = 0; i < HEADER_ID_STR.length; i++) { if (headerStamp[i] != HEADER_ID_STR[i]) { throw new IOException("Invalid LobFile header stamp"); } } } /** * @return the format version number for this LobFile */ public int getVersion() { return version; } /** * @return the RecordStartMark for this LobFile. */ public RecordStartMark getStartMark() { return startMark; } /** * @return the MetaBlock for this LobFile. */ public MetaBlock getMetaBlock() { return metaBlock; } } /** * Holds a RecordStartMark -- a 16 byte randomly-generated * sync token. Can read a RSM from an input source, or can * generate a new one. */ private static class RecordStartMark implements Writable { // This is a 16-byte array. public static final int START_MARK_LENGTH = 16; private byte [] startBytes; public RecordStartMark() { generateStartMark(); } public RecordStartMark(DataInput in) throws IOException { readFields(in); } public byte [] getBytes() { byte [] out = new byte[START_MARK_LENGTH]; System.arraycopy(this.startBytes, 0, out, 0, START_MARK_LENGTH); return out; } public void readFields(DataInput in) throws IOException { this.startBytes = new byte[START_MARK_LENGTH]; in.readFully(this.startBytes); } public void write(DataOutput out) throws IOException { out.write(this.startBytes); } /** * Generate a new random RecordStartMark. */ private void generateStartMark() { this.startBytes = RandomHash.generateMD5Bytes(); } } /** * Represents the metadata block stored in the header of a LobFile. */ private static class MetaBlock extends AbstractMap<String, BytesWritable> implements Writable { // Strings which typically appear in the metablock have canonical names. public static final String ENTRY_ENCODING_KEY = "EntryEncoding"; public static final String COMPRESSION_CODEC_KEY = "CompressionCodec"; public static final String ENTRIES_PER_SEGMENT_KEY = "EntriesPerSegment"; // Standard entry encodings. public static final String CLOB_ENCODING = "CLOB"; public static final String BLOB_ENCODING = "BLOB"; private Map<String, BytesWritable> entries; public MetaBlock() { entries = new TreeMap<String, BytesWritable>(); } public MetaBlock(DataInput in) throws IOException { entries = new TreeMap<String, BytesWritable>(); readFields(in); } public MetaBlock(Map<String, BytesWritable> map) { entries = new TreeMap<String, BytesWritable>(); for (Map.Entry<String, BytesWritable> entry : map.entrySet()) { entries.put(entry.getKey(), entry.getValue()); } } @Override public Set<Map.Entry<String, BytesWritable>> entrySet() { return entries.entrySet(); } @Override public BytesWritable put(String k, BytesWritable v) { BytesWritable old = entries.get(k); entries.put(k, v); return old; } public BytesWritable put(String k, String v) { try { return put(k, new BytesWritable(v.getBytes("UTF-8"))); } catch (UnsupportedEncodingException uee) { // Shouldn't happen; UTF-8 is always supported. throw new RuntimeException(uee); } } @Override public BytesWritable get(Object k) { return entries.get(k); } public String getString(Object k) { BytesWritable bytes = get(k); if (null == bytes) { return null; } else { try { return new String(bytes.getBytes(), 0, bytes.getLength(), "UTF-8"); } catch (UnsupportedEncodingException uee) { // Shouldn't happen; UTF-8 is always supported. throw new RuntimeException(uee); } } } public void readFields(DataInput in) throws IOException { int numEntries = WritableUtils.readVInt(in); entries.clear(); for (int i = 0; i < numEntries; i++) { String key = Text.readString(in); BytesWritable val = new BytesWritable(); val.readFields(in); entries.put(key, val); } } public void write(DataOutput out) throws IOException { int numEntries = entries.size(); WritableUtils.writeVInt(out, numEntries); for (Map.Entry<String, BytesWritable> entry : entries.entrySet()) { Text.writeString(out, entry.getKey()); entry.getValue().write(out); } } } /** * Describes an IndexSegment. This is one entry in the IndexTable. It * holds the physical location of the IndexSegment in the file, as well * as the range of entryIds and byte ranges corresponding to records * described by the index subset in the IndexSegment. */ private static class IndexTableEntry implements Writable { private long segmentOffset; private long firstIndexId; private long firstIndexOffset; private long lastIndexOffset; public IndexTableEntry() { } public IndexTableEntry(DataInput in) throws IOException { readFields(in); } private void setSegmentOffset(long offset) { this.segmentOffset = offset; } private void setFirstIndexId(long id) { this.firstIndexId = id; } private void setFirstIndexOffset(long offset) { this.firstIndexOffset = offset; } private void setLastIndexOffset(long offset) { this.lastIndexOffset = offset; } public void write(DataOutput out) throws IOException { WritableUtils.writeVLong(out, segmentOffset); WritableUtils.writeVLong(out, firstIndexId); WritableUtils.writeVLong(out, firstIndexOffset); WritableUtils.writeVLong(out, lastIndexOffset); } public void readFields(DataInput in) throws IOException { segmentOffset = WritableUtils.readVLong(in); firstIndexId = WritableUtils.readVLong(in); firstIndexOffset = WritableUtils.readVLong(in); lastIndexOffset = WritableUtils.readVLong(in); } /** * @return the entryId of the first record indexed by this segment. */ public long getFirstIndexId() { return this.firstIndexId; } /** * @return the offset of the first record indexed by this segment. */ public long getFirstIndexOffset() { return this.firstIndexOffset; } /** * @return the offset of the last record indexed by this segment. */ public long getLastIndexOffset() { return this.lastIndexOffset; } /** * @return the offset from the start of the file of the IndexSegment * data itself. */ public long getSegmentOffset() { return this.segmentOffset; } /** * Inform whether the user's requested offset corresponds * to a record that starts in this IndexSegment. If this * returns true, the requested offset may actually be in * a previous IndexSegment. * @param off the offset of the start of a record to test. * @return true if the user's requested offset is in this * or a previous IndexSegment. */ public boolean containsOffset(long off) { return off <= getLastIndexOffset(); } } /** * Class that represents the IndexSegment entries in a LobIndex. */ private static class IndexSegment implements Writable { // The main body of the IndexSegment: the record lengths // of all the records in the IndexSegment. private BytesWritable recordLenBytes; // The length of the previously recorded field (used when // generating an index). Intermediate state used in calculation // of the lastIndexOffset. private long prevLength; // Used to write VLong-encoded lengths into a temp // array, which are then copied into recordLenBytes. private DataOutputBuffer outputBuffer; // The IndexTableEntry that describes this IndexSegment in the IndexTable. private IndexTableEntry tableEntry; public IndexSegment(IndexTableEntry tableEntry) { this.recordLenBytes = new BytesWritable(); this.outputBuffer = new DataOutputBuffer(10); // max VLong size. this.tableEntry = tableEntry; } /** * Read an IndexSegment from an existing file. */ public IndexSegment(IndexTableEntry tableEntry, DataInput in) throws IOException { this.recordLenBytes = new BytesWritable(); this.outputBuffer = new DataOutputBuffer(10); this.tableEntry = tableEntry; readFields(in); } /** * @return the IndexTableEntry describing this IndexSegment in the * IndexTable. */ public IndexTableEntry getTableEntry() { return tableEntry; } /** * Add a recordLength to the recordLenBytes array. */ public void addRecordLen(long recordLen) throws IOException { // Allocate space for the new bytes. int numBytes = WritableUtils.getVIntSize(recordLen); recordLenBytes.setSize(recordLenBytes.getLength() + numBytes); // Write the new bytes into a temporary buffer wrapped in a DataOutput. outputBuffer.reset(); WritableUtils.writeVLong(outputBuffer, recordLen); // Then copy those new bytes into the end of the recordLenBytes array. System.arraycopy(outputBuffer.getData(), 0, recordLenBytes.getBytes(), recordLenBytes.getLength() - numBytes, numBytes); // Now that we've added a new recordLength to the array, // it's the last index. We need to calculate its offset. // This is based on how long the previous record was. this.tableEntry.setLastIndexOffset( this.tableEntry.getLastIndexOffset() + this.prevLength); // Save this record's length (unserialized) for calculating // lastIndexOffset for the next record. this.prevLength = recordLen; } public void write(DataOutput out) throws IOException { // Write the SEGMENT_HEADER_ID to distinguish this from a LobRecord. WritableUtils.writeVLong(out, SEGMENT_HEADER_ID); // The length of the main body of the segment is the length of the // data byte array. int segmentBytesLen = recordLenBytes.getLength(); WritableUtils.writeVLong(out, segmentBytesLen); // Write the body of the segment. out.write(recordLenBytes.getBytes(), 0, segmentBytesLen); } public void readFields(DataInput in) throws IOException { // After the RecordStartMark, we expect to get a SEGMENT_HEADER_ID (-1). long segmentId = WritableUtils.readVLong(in); if (SEGMENT_HEADER_ID != segmentId) { throw new IOException("Expected segment header id " + SEGMENT_HEADER_ID + "; got " + segmentId); } // Get the length of the rest of the segment, in bytes. long length = WritableUtils.readVLong(in); // Now read the actual main byte array. if (length > Integer.MAX_VALUE) { throw new IOException("Unexpected oversize data array length: " + length); } else if (length < 0) { throw new IOException("Unexpected undersize data array length: " + length); } byte [] segmentData = new byte[(int) length]; in.readFully(segmentData); recordLenBytes = new BytesWritable(segmentData); reset(); // Reset the iterator allowing the user to yield offset/lengths. } // The following methods are used by a Reader to walk through the index // segment and get data about the records described in this segment of // the index. private DataInputBuffer dataInputBuf; // The following two fields are advanced by the next() method. private long curOffset; // offset into the file of the current record. private long curLen; // length of the current record in bytes. // Used to allow rewindOnce() to go backwards a single position in the // iterator. private int prevInputBufPos; // prev offset into dataInputBuf. private long prevOffset; private long prevLen; /** * Resets the record index iterator. */ public void reset() { this.dataInputBuf = null; } /** * Aligns the iteration capability to return info about the next * record in the IndexSegment. Must be called before the first * record. * @return true if there is another record described in this IndexSegment. */ public boolean next() { this.prevOffset = this.curOffset; if (null == dataInputBuf) { // We need to set up the iterator; this is the first use. if (null == recordLenBytes) { return false; // We don't have any records? } this.dataInputBuf = new DataInputBuffer(); this.dataInputBuf.reset(recordLenBytes.getBytes(), 0, recordLenBytes.getLength()); this.curOffset = this.tableEntry.getFirstIndexOffset(); this.prevOffset = 0; } else { this.curOffset += this.curLen; } boolean available = dataInputBuf.getPosition() < dataInputBuf.getLength(); if (available) { this.prevInputBufPos = dataInputBuf.getPosition(); // Then read out the next record length. try { this.prevLen = this.curLen; this.curLen = WritableUtils.readVLong(dataInputBuf); } catch (IOException ioe) { // Shouldn't happen; data in DataInputBuffer is materialized. throw new RuntimeException(ioe); } } return available; } /** * Undoes a single call to next(). This cannot be called twice in a row; * before calling this again, next() must be called in the interim. This * makes a subsequent call to next() yield the same iterated values as the * previous call. */ public void rewindOnce() { // Move the buffer backwards so we deserialize the same VLong with // the next call. if (prevInputBufPos == 0) { // We actually rewound the first next() in the iterator. // Just reset the iterator to the beginning. Otherwise we'll // backfill it with bogus data. reset(); } else { // Use the normal codepath; move the serialization buffer // backwards and restores the previously yielded values. dataInputBuf.reset(recordLenBytes.getBytes(), prevInputBufPos, recordLenBytes.getLength() - prevInputBufPos); // And restore the previously-yielded values. this.curLen = this.prevLen; this.curOffset = this.prevOffset; } } /** * Returns the length of the current record. * You must call next() and it must return true before calling this method. * @return the length in bytes of the current record. */ public long getCurRecordLen() { return curLen; } /** * Returns the offset of the current record from the beginning of the file. * You must call next() and it must return true before calling this method. * @return the offset in bytes from the beginning of the file for the * current record. */ public long getCurRecordStart() { return curOffset; } } /** * Stores the locations and ranges indexed by each IndexSegment. */ private static class IndexTable implements Iterable<IndexTableEntry>, Writable { private List<IndexTableEntry> tableEntries; public IndexTable() { tableEntries = new ArrayList<IndexTableEntry>(); } public IndexTable(DataInput in) throws IOException { readFields(in); } public void readFields(DataInput in) throws IOException { long recordTypeId = WritableUtils.readVLong(in); if (recordTypeId != INDEX_TABLE_ID) { // We expected to read an IndexTable. throw new IOException("Expected IndexTable; got record with typeId=" + recordTypeId); } int tableCount = WritableUtils.readVInt(in); tableEntries = new ArrayList<IndexTableEntry>(tableCount); for (int i = 0; i < tableCount; i++) { tableEntries.add(new IndexTableEntry(in)); } } public void write(DataOutput out) throws IOException { // Start with the record type id. WritableUtils.writeVLong(out, INDEX_TABLE_ID); // Then the count of the records. WritableUtils.writeVInt(out, tableEntries.size()); // Followed by the table itself. for (IndexTableEntry entry : tableEntries) { entry.write(out); } } public void add(IndexTableEntry entry) { tableEntries.add(entry); } public IndexTableEntry get(int i) { return tableEntries.get(i); } public int size() { return tableEntries.size(); } public Iterator<IndexTableEntry> iterator() { return tableEntries.iterator(); } } /** * Reader implementation for LobFile format version 0. Acquire with * LobFile.open(). */ private static class V0Reader extends com.cloudera.sqoop.io.LobFile.Reader { public static final Log LOG = LogFactory.getLog( V0Reader.class.getName()); // Forward seeks of up to this size are performed by reading, not seeking. private static final long MAX_CONSUMPTION_WIDTH = 512 * 1024; private LobFileHeader header; private Configuration conf; // Codec to use to decompress the file. private CompressionCodec codec; private Decompressor decompressor; // Length of the entire file. private long fileLen; // State bit set to true after we've called next() and successfully // aligned on a record. If true, we can hand an InputStream back to // the user. private boolean isAligned; // After we've aligned on a record, this contains the record's // reported length. In the presence of compression, etc, this may // not represent its true length in the file. private long claimedRecordLen; // After we've aligned on a record, this contains its entryId. private long curEntryId; // After we've aligned on a record, this contains the offset of the // beginning of its RSM from the start of the file. private long curRecordOffset; // After we've aligned on a record, this contains the record's // true length from the index. private long indexRecordLen; // tmp buffer used to consume RecordStartMarks during alignment. private byte [] tmpRsmBuf; // The actual file stream itself, which we can move around (e.g. with // seeking). private FSDataInputStream underlyingInput; // The data deserializer we typically place on top of this. // If we use underlyingInput.seek(), then we instantiate a new // dataIn on top of it. private DataInputStream dataIn; // The user accesses the current record through a stream memoized here. // We retain a pointer here so that we can forcibly close the old // userInputStream when they want to align on the next record. private InputStream userInputStream; // The current index segment to read record lengths from. private IndexSegment curIndexSegment; // The offset into the indexTable of the curIndexSegment. private int curIndexSegmentId; // The IndexTable that provides fast pointers to the IndexSegments. private IndexTable indexTable; // The path being opened. private Path path; // Users should use LobFile.open() instead of directly calling this. V0Reader(Path path, Configuration conf, LobFileHeader header, DataInputStream dis, FSDataInputStream stream, long fileLen) throws IOException { this.path = LobReaderCache.qualify(path, conf); this.conf = conf; this.header = header; this.dataIn = dis; this.underlyingInput = stream; this.isAligned = false; this.tmpRsmBuf = new byte[RecordStartMark.START_MARK_LENGTH]; this.fileLen = fileLen; LOG.debug("Opening LobFile path: " + path); openCodec(); openIndex(); } /** * If the user has specified a compression codec in the header metadata, * create an instance of it. */ private void openCodec() throws IOException { String codecName = header.getMetaBlock().getString( MetaBlock.COMPRESSION_CODEC_KEY); if (null != codecName) { LOG.debug("Decompressing file with codec: " + codecName); this.codec = CodecMap.getCodec(codecName, conf); if (null != this.codec) { this.decompressor = codec.createDecompressor(); } } } /** * Get the first index segment out of the file; determine * where that is by loading the index locator at the end of * the file. */ private void openIndex() throws IOException { // Jump to the end of the file. // At the end of the file is a RSM followed by two VLongs; // the first of these is the value -2 (one byte) and the // second of these is the offset of the beginning of the index (up to // 9 bytes). internalSeek(fileLen - RecordStartMark.START_MARK_LENGTH - 10); byte [] finaleBuffer = new byte[RecordStartMark.START_MARK_LENGTH + 10]; this.dataIn.readFully(finaleBuffer); // Figure out where in the finaleBuffer the RSM actually starts, // as the finale might not fully fill the finaleBuffer. int rsmStart = findRecordStartMark(finaleBuffer); if (-1 == rsmStart) { throw new IOException( "Corrupt file index; could not find index start offset."); } // Wrap a buffer around those two vlongs. int vlongStart = rsmStart + RecordStartMark.START_MARK_LENGTH; DataInputBuffer inBuf = new DataInputBuffer(); inBuf.reset(finaleBuffer, vlongStart, finaleBuffer.length - vlongStart); long offsetMarker = WritableUtils.readVLong(inBuf); if (SEGMENT_OFFSET_ID != offsetMarker) { // This isn't the correct signature; we got an RSM ahead of some // other data. throw new IOException("Invalid segment offset id: " + offsetMarker); } // This will contain the position of the IndexTable. long indexTableStart = WritableUtils.readVLong(inBuf); LOG.debug("IndexTable begins at " + indexTableStart); readIndexTable(indexTableStart); // Set up to read records from the beginning of the file. This // starts with the first IndexSegment. curIndexSegmentId = 0; loadIndexSegment(); // This has moved the file pointer all over but we don't need to // worry about resetting it now. The next() method will seek the // file pointer to the first record when the user is ready to // consume it. } /** * Load the entire IndexTable into memory and decode it. */ private void readIndexTable(long indexTableOffset) throws IOException { internalSeek(indexTableOffset); // Read the RecordStartMark ahead of the IndexTable. this.dataIn.readFully(tmpRsmBuf); if (!matchesRsm(tmpRsmBuf)) { throw new IOException("Expected record start mark before IndexTable"); } this.indexTable = new IndexTable(dataIn); } /** * Ingest the next IndexSegment. */ private void readNextIndexSegment() throws IOException { this.curIndexSegmentId++; loadIndexSegment(); } /** * Load curIndexSegment with the segment specified by curIndexSegmentId. * The file pointer will be moved to the position after this segment. * If the segment id does not exist, then the curIndexSegment will be * set to null. */ private void loadIndexSegment() throws IOException { if (indexTable.size() <= curIndexSegmentId || curIndexSegmentId < 0) { // We've iterated past the last IndexSegment. Set this to null // and return; the next() method will then return false. this.curIndexSegment = null; return; } // Otherwise, seek to the segment and load it. IndexTableEntry tableEntry = indexTable.get(curIndexSegmentId); long segmentOffset = tableEntry.getSegmentOffset(); internalSeek(segmentOffset); readPositionedIndexSegment(); } /** * When the underlying stream is aligned on the RecordStartMark * ahead of an IndexSegment, read in the next IndexSegment. * After this method the curIndexSegment contains the next * IndexSegment to read in the file; if the entire index has been * read in this fastion, curIndexSegment will be null. */ private void readPositionedIndexSegment() throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("Reading index segment at " + tell()); } // Read the RecordStartMark ahead of the IndexSegment. this.dataIn.readFully(tmpRsmBuf); if (!matchesRsm(tmpRsmBuf)) { throw new IOException("Expected record start mark before IndexSegment"); } // Read the IndexSegment proper. this.curIndexSegment = new IndexSegment( this.indexTable.get(curIndexSegmentId), this.dataIn); } /** * @return true if the bytes in 'buf' starting at 'offset' match * the RecordStartMark. * @param rsm the RecordStartMark * @param buf the buffer to check * @param offset the offset into buf to begin checking. */ private boolean matchesRsm(byte [] rsm, byte [] buf, int offset) { for (int i = 0; i < RecordStartMark.START_MARK_LENGTH; i++) { if (buf[i + offset] != rsm[i]) { return false; // Mismatch at position i. } } return true; // Matched the whole thing. } private boolean matchesRsm(byte [] buf, int offset) { return matchesRsm(this.header.getStartMark().getBytes(), buf, offset); } private boolean matchesRsm(byte [] buf) { return matchesRsm(buf, 0); } /** * @return the offset in 'buf' where a RecordStartMark begins, or -1 * if the RecordStartMark is not present in the buffer. */ private int findRecordStartMark(byte [] buf) { byte [] rsm = this.header.getStartMark().getBytes(); for (int i = 0; i < buf.length; i++) { if (matchesRsm(rsm, buf, i)) { return i; } } return -1; // couldn't find it. } @Override /** {@inheritDoc} */ public Path getPath() { return this.path; } @Override /** {@inheritDoc} */ public long tell() throws IOException { checkForNull(this.underlyingInput); return this.underlyingInput.getPos(); } @Override /** {@inheritDoc} */ public void seek(long pos) throws IOException { closeUserStream(); checkForNull(this.underlyingInput); this.isAligned = false; searchForRecord(pos); } /** * Search the index for the first record starting on or after 'start'. * @param start the offset in the file where we should start looking * for a record. */ private void searchForRecord(long start) throws IOException { LOG.debug("Looking for the first record at/after offset " + start); // Scan through the IndexTable until we find the IndexSegment // that contains the offset. for (int i = 0; i < indexTable.size(); i++) { IndexTableEntry tableEntry = indexTable.get(i); if (LOG.isDebugEnabled()) { LOG.debug("Checking index table entry for range: " + tableEntry.getFirstIndexOffset() + ", " + tableEntry.getLastIndexOffset()); } if (tableEntry.containsOffset(start)) { // Seek to the IndexSegment associated with this tableEntry. curIndexSegmentId = i; loadIndexSegment(); // Use this index segment. The record index iterator // is at the beginning of the IndexSegment, since we just // read it in. LOG.debug("Found matching index segment."); while (this.curIndexSegment.next()) { long curStart = this.curIndexSegment.getCurRecordStart(); if (curStart >= start) { LOG.debug("Found seek target record with offset " + curStart); // This is the first record to meet this criterion. // Rewind the index iterator by one so that the next() // method will do the right thing. next() will also // take care of actually seeking to the correct position // in the file to read the record proper. this.curIndexSegment.rewindOnce(); return; } } // If it wasn't actually in this IndexSegment, then we've // got a corrupt IndexTableEntry; the entry represented that // the segment ran longer than it actually does. throw new IOException("IndexTableEntry claims last offset of " + tableEntry.getLastIndexOffset() + " but IndexSegment ends early." + " The IndexTable appears corrupt."); } } // If we didn't return inside the loop, then we've searched the entire // file and it's not there. Advance the IndexSegment iterator to // the end of the road so that next() returns false. this.curIndexSegmentId = indexTable.size(); loadIndexSegment(); } /** * Read data from the stream and discard it. * @param numBytes number of bytes to read and discard. */ private void consumeBytes(int numBytes) throws IOException { int remaining = numBytes; while (remaining > 0) { int received = dataIn.skipBytes(remaining); if (received < 1) { throw new IOException("Could not consume additional bytes"); } remaining -= received; } } /** * Seek to position 'pos' (offset from start of file). If this * is nearby, actually just consume data from the underlying * stream rather than doing a real seek. * @param targetPos the position to seek to, expressed as an offset * from the start of the file. */ private void internalSeek(long targetPos) throws IOException { long curPos = this.underlyingInput.getPos(); LOG.debug("Internal seek: target=" + targetPos + "; cur=" + curPos); long distance = targetPos - curPos; if (targetPos == curPos) { LOG.debug("(no motion required)"); return; // We're already there! } else if (targetPos > curPos && distance < MAX_CONSUMPTION_WIDTH) { // We're "close enough" that we should just read it. LOG.debug("Advancing by " + distance + " bytes."); consumeBytes((int) distance); } else { LOG.debug("Direct seek to target"); this.underlyingInput.seek(targetPos); this.dataIn = new DataInputStream(this.underlyingInput); } } /** * Close any stream to an open record that was opened by a user. */ private void closeUserStream() throws IOException { if (this.userInputStream != null) { this.userInputStream.close(); this.userInputStream = null; } } @Override /** {@inheritDoc} */ public boolean next() throws IOException { LOG.debug("Checking for next record"); checkForNull(this.underlyingInput); // If the user has opened a record stream, it is now void. closeUserStream(); this.isAligned = false; // false until proven true. // Get the position of the next record start. // Check the index: is there another record? if (null == curIndexSegment) { LOG.debug("Index is finished; false"); return false; // No index remains. Ergo, no more records. } boolean moreInSegment = curIndexSegment.next(); if (!moreInSegment) { // The current IndexSegment has been exhausted. Move to the next. LOG.debug("Loading next index segment."); readNextIndexSegment(); if (null == curIndexSegment) { LOG.debug("Index is finished; false"); return false; // No index; no records. } // Try again with the next IndexSegment. moreInSegment = curIndexSegment.next(); } if (!moreInSegment) { // Nothing left in the last IndexSegment. LOG.debug("Last index segment is finished; false."); this.curIndexSegment = null; return false; } // Determine where the next record starts. this.indexRecordLen = this.curIndexSegment.getCurRecordLen(); this.curRecordOffset = this.curIndexSegment.getCurRecordStart(); LOG.debug("Next record starts at position: " + this.curRecordOffset + "; indexedLen=" + this.indexRecordLen); // Make sure we're at the target position. internalSeek(this.curRecordOffset); // We are now on top of the next record's RecordStartMark. // Consume the RSM and the record header. this.dataIn.readFully(this.tmpRsmBuf); if (!matchesRsm(tmpRsmBuf)) { // No rsm? No dice. throw new IOException("Index contains bogus offset."); } this.curEntryId = WritableUtils.readVLong(this.dataIn); if (this.curEntryId < 0) { // We've moved past the end of the records and started // trying to consume the index. This is the EOF from // the client's perspective. LOG.debug("Indexed position is itself an IndexSegment; false."); return false; } LOG.debug("Aligned on record id=" + this.curEntryId); this.claimedRecordLen = WritableUtils.readVLong(this.dataIn); LOG.debug("Record has claimed length " + this.claimedRecordLen); // We are now aligned on the start of the user's data. this.isAligned = true; return true; } @Override /** {@inheritDoc} */ public boolean isRecordAvailable() { return this.isAligned; } @Override /** {@inheritDoc} */ public long getRecordLen() { return this.claimedRecordLen; } @Override /** {@inheritDoc} */ public long getRecordId() { return this.curEntryId; } @Override /** {@inheritDoc} */ public long getRecordOffset() { return this.curRecordOffset; } @Override /** {@inheritDoc} */ public InputStream readBlobRecord() throws IOException { if (!isRecordAvailable()) { // we're not currently aligned on a record-start. // Try to get the next one. if (!next()) { // No more records available. throw new EOFException("End of file reached."); } } // Ensure any previously-open user record stream is closed. closeUserStream(); // Mark this record as consumed. this.isAligned = false; // The length of the stream we can return to the user is // the indexRecordLen minus the length of any per-record headers. // That includes the RecordStartMark, the entryId, and the claimedLen. long streamLen = this.indexRecordLen - RecordStartMark.START_MARK_LENGTH - WritableUtils.getVIntSize(this.curEntryId) - WritableUtils.getVIntSize(this.claimedRecordLen); LOG.debug("Yielding stream to user with length " + streamLen); this.userInputStream = new FixedLengthInputStream(this.dataIn, streamLen); if (this.codec != null) { // The user needs to decompress the data; wrap the InputStream. decompressor.reset(); this.userInputStream = new DecompressorStream( this.userInputStream, decompressor); } return this.userInputStream; } @Override /** {@inheritDoc} */ public java.io.Reader readClobRecord() throws IOException { // Get a handle to the binary reader and then wrap it. InputStream is = readBlobRecord(); return new InputStreamReader(is); } @Override /** {@inheritDoc} */ public void close() throws IOException { closeUserStream(); if (null != dataIn) { dataIn.close(); dataIn = null; } if (null != underlyingInput) { underlyingInput.close(); underlyingInput = null; } this.isAligned = false; } @Override /** {@inheritDoc} */ public boolean isClosed() { return this.underlyingInput == null; } } /** * Concrete writer implementation for LobFile format version 0. * Instantiate via LobFile.create(). */ private static class V0Writer extends com.cloudera.sqoop.io.LobFile.Writer { public static final Log LOG = LogFactory.getLog( V0Writer.class.getName()); private Configuration conf; private Path path; private boolean isCharData; private LobFileHeader header; private String codecName; private CompressionCodec codec; private Compressor compressor; // The LobIndex we are constructing. private LinkedList<IndexSegment> indexSegments; // Number of entries in the current IndexSegment. private int entriesInSegment; private IndexTable indexTable; // Number of entries that can be written to a single IndexSegment. private int maxEntriesPerSegment; // By default we write this many entries per IndexSegment. static final int DEFAULT_MAX_SEGMENT_ENTRIES = 4096; // Our OutputStream to the underlying file. private DataOutputStream out; // 'out' is layered on top of this stream, which gives us a count // of how much data we've written so far. private CountingOutputStream countingOut; // State regarding the current record being written. private long curEntryId; // entryId of the current LOB being written. private long curClaimedLen; // The user claims a length for a record. // The user's OutputStream and/or Writer that writes to us. private OutputStream userOutputStream; private java.io.Writer userWriter; // The userCountingOutputStream may be the same as userOutputStream; // but if the user is writing through a compressor, it is actually // underneath of it. This tells us how many compressed bytes were // really written. private CountingOutputStream userCountingOutputStream; /** * Creates a LobFile Writer for file format version 0. * @param p the path to create. * @param conf the configuration to use to interact with the filesystem. * @param isCharData true if this is for CLOBs, false for BLOBs. * @param codecName the compression codec to use (or null for none). * @param entriesPerSegment the number of index entries per IndexSegment. */ V0Writer(Path p, Configuration conf, boolean isCharData, String codecName, int entriesPerSegment) throws IOException { this.path = LobReaderCache.qualify(p, conf); this.conf = conf; this.isCharData = isCharData; this.header = new LobFileHeader(); this.indexSegments = new LinkedList<IndexSegment>(); this.indexTable = new IndexTable(); this.maxEntriesPerSegment = entriesPerSegment; this.codecName = codecName; if (this.codecName != null) { this.codec = CodecMap.getCodec(codecName, conf); if (null != this.codec) { this.compressor = codec.createCompressor(); } } init(); } /** * Open the file and write its header. */ private void init() throws IOException { FileSystem fs = this.path.getFileSystem(conf); FSDataOutputStream fsOut = fs.create(this.path); this.countingOut = new CountingOutputStream( new BufferedOutputStream(fsOut)); this.out = new DataOutputStream(this.countingOut); // put any necessary config strings into the header. MetaBlock m = this.header.getMetaBlock(); if (isCharData) { m.put(MetaBlock.ENTRY_ENCODING_KEY, MetaBlock.CLOB_ENCODING); } else { m.put(MetaBlock.ENTRY_ENCODING_KEY, MetaBlock.BLOB_ENCODING); } if (null != codec) { m.put(MetaBlock.COMPRESSION_CODEC_KEY, this.codecName); } // Serialize the value of maxEntriesPerSegment as a VInt in a byte array // and put that into the metablock as ENTRIES_PER_SEGMENT_KEY. int segmentBufLen = WritableUtils.getVIntSize(this.maxEntriesPerSegment); DataOutputBuffer entriesPerSegBuf = new DataOutputBuffer(segmentBufLen); WritableUtils.writeVInt(entriesPerSegBuf, this.maxEntriesPerSegment); byte [] entriesPerSegArray = Arrays.copyOf(entriesPerSegBuf.getData(), segmentBufLen); m.put(MetaBlock.ENTRIES_PER_SEGMENT_KEY, new BytesWritable(entriesPerSegArray)); // Write the file header to the file. this.header.write(out); // Now we're ready to accept record data from the user. } @Override /** {@inheritDoc} */ public Path getPath() { return this.path; } @Override /** * {@inheritDoc} */ public long tell() throws IOException { checkForNull(this.out); this.out.flush(); return this.countingOut.getByteCount(); } @Override /** * {@inheritDoc} */ public void close() throws IOException { finishRecord(); writeIndex(); if (this.out != null) { this.out.close(); this.out = null; } if (this.countingOut != null) { this.countingOut.close(); this.countingOut = null; } } @Override /** * {@inheritDoc} */ public void finishRecord() throws IOException { if (null != this.userWriter) { this.userWriter.close(); this.userWriter = null; } if (null != this.userCountingOutputStream) { // If there is a wrapping stream for compression, // close this first. if (null != this.userOutputStream && this.userOutputStream != this.userCountingOutputStream) { this.userOutputStream.close(); } // Now close the "main" stream. this.userCountingOutputStream.close(); // Write the true length of the current record to the index. updateIndex(this.userCountingOutputStream.getByteCount() + RecordStartMark.START_MARK_LENGTH + WritableUtils.getVIntSize(curEntryId) + WritableUtils.getVIntSize(curClaimedLen)); this.userOutputStream = null; this.userCountingOutputStream = null; } if (null != this.out) { out.flush(); } } /** * Write in the current IndexSegment, the true compressed length of the * record we just finished writing. * @param curRecordLen the true length in bytes of the compressed record. */ private void updateIndex(long curRecordLen) throws IOException { LOG.debug("Adding index entry: id=" + curEntryId + "; len=" + curRecordLen); indexSegments.getLast().addRecordLen(curRecordLen); entriesInSegment++; curEntryId++; } /** * Write the index itself to the file. */ private void writeIndex() throws IOException { // Write out all the segments in turn. // As we do so, reify their offsets into the IndexTable. for (IndexSegment segment : indexSegments) { long segmentOffset = tell(); segment.getTableEntry().setSegmentOffset(segmentOffset); header.getStartMark().write(out); segment.write(out); } long indexTableStartPos = tell(); // Save for the end of the file. LOG.debug("IndexTable offset: " + indexTableStartPos); header.getStartMark().write(out); indexTable.write(out); // write the IndexTable record. // Write the finale that tells us where the IndexTable begins. header.getStartMark().write(out); WritableUtils.writeVLong(out, SEGMENT_OFFSET_ID); WritableUtils.writeVLong(out, indexTableStartPos); } /** * Prepare to index a new record that will soon be written to the file. * If this is is the first record in the current IndexSegment, we need * to record its entryId and the current file position. */ private void startRecordIndex() throws IOException { if (entriesInSegment == maxEntriesPerSegment || indexSegments.size() == 0) { // The current segment is full. Start a new one. this.entriesInSegment = 0; IndexTableEntry tableEntry = new IndexTableEntry(); IndexSegment curSegment = new IndexSegment(tableEntry); this.indexSegments.add(curSegment); long filePos = tell(); LOG.debug("Starting IndexSegment; first id=" + curEntryId + "; off=" + filePos); tableEntry.setFirstIndexId(curEntryId); tableEntry.setFirstIndexOffset(filePos); tableEntry.setLastIndexOffset(filePos); this.indexTable.add(tableEntry); } } @Override /** * {@inheritDoc} */ public OutputStream writeBlobRecord(long claimedLen) throws IOException { finishRecord(); // finish any previous record. checkForNull(this.out); startRecordIndex(); this.header.getStartMark().write(out); LOG.debug("Starting new record; id=" + curEntryId + "; claimedLen=" + claimedLen); WritableUtils.writeVLong(out, curEntryId); WritableUtils.writeVLong(out, claimedLen); this.curClaimedLen = claimedLen; this.userCountingOutputStream = new CountingOutputStream( new CloseShieldOutputStream(out)); if (null == this.codec) { // No codec; pass thru the same OutputStream to the user. this.userOutputStream = this.userCountingOutputStream; } else { // Wrap our CountingOutputStream in a compressing OutputStream to // give to the user. this.compressor.reset(); this.userOutputStream = new CompressorStream( this.userCountingOutputStream, compressor); } return this.userOutputStream; } @Override /** * {@inheritDoc} */ public java.io.Writer writeClobRecord(long len) throws IOException { if (!isCharData) { throw new IOException( "Can only write CLOB data to a Clob-specific LobFile"); } // Get a binary handle to the record and wrap it in a java.io.Writer. writeBlobRecord(len); this.userWriter = new OutputStreamWriter(userOutputStream); return this.userWriter; } } }