IndexedStorage.java example

Explorer
spork-streaming-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.pig.piggybank.storage;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.pig.IndexableLoadFunc;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextInputFormat;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat;
import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
import org.apache.pig.piggybank.storage.IndexedStorage.IndexedStorageInputFormat.IndexedStorageRecordReader;
import org.apache.pig.piggybank.storage.IndexedStorage.IndexedStorageInputFormat.IndexedStorageRecordReader.IndexedStorageRecordReaderComparator;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.data.DataReaderWriter;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.util.StorageUtil;
import org.apache.pig.data.DataType;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;

/**
 * <code>IndexedStorage</code> is a form of <code>PigStorage</code> that supports a
 * per record seek.  <code>IndexedStorage</code> creates a separate (hidden) index file for
 * every data file that is written.  The format of the index file is:
 * <pre>
 * | Header     |
 * | Index Body |
 * | Footer     |
 * </pre>
 * The Header contains the list of record indices (field numbers) that represent index keys.
 * The Index Body contains a <code>Tuple</code> for each record in the data.  
 * The fields of the <code>Tuple</code> are:
 * <ul>
 * <li> The index key(s) <code>Tuple</code> </li>
 * <li> The number of records that share this index key. </li>
 * <li> Offset into the data file to read the first matching record. </li>
 * </ul>
 * The Footer contains sequentially:
 * <ul>
 * <li> The smallest key(s) <code>Tuple</code> in the index. </li>
 * <li> The largest key(s) <code>Tuple</code> in the index. </li>
 * <li> The offset in bytes to the start of the footer </li>
 * </ul>
 * 
 * <code>IndexStorage</code> implements <code>IndexableLoadFunc</code> and
 * can be used as the 'right table' in a PIG 'merge' or 'merge-sparse' join.
 *
 * <code>IndexStorage</code> does not require the data to be globally partitioned & sorted
 * by index keys.  Each partition (separate index) must be locally sorted.
 *
 * Also note IndexStorage is a loader to demonstrate "merge-sparse" join.
 */
public class IndexedStorage extends PigStorage implements IndexableLoadFunc {

	/**
	 * Constructs a Pig Storer that uses specified regex as a field delimiter.
	 * @param delimiter - field delimiter to use
	 * @param offsetsToIndexKeys - list of offset into Tuple for index keys (comma separated)
	 */
	public IndexedStorage(String delimiter, String offsetsToIndexKeys) {
		super(delimiter);

		this.fieldDelimiter = StorageUtil.parseFieldDel(delimiter);

		String[] stroffsetsToIndexKeys = offsetsToIndexKeys.split(",");
		this.offsetsToIndexKeys = new int[stroffsetsToIndexKeys.length];
		for (int i = 0; i < stroffsetsToIndexKeys.length; ++i) {
			this.offsetsToIndexKeys[i] = Integer.parseInt(stroffsetsToIndexKeys[i]);
		}
	}

	@Override
	public OutputFormat getOutputFormat() {
		return new IndexedStorageOutputFormat(fieldDelimiter, offsetsToIndexKeys);
	}

	/**
	 * Assumes this list of readers is already sorted except for the provided element. 
         * This element is bubbled up the array to its appropriate sort location 
         * (faster than doing a Utils sort).
	 */
	private void sortReader(int startIndex) {
		int idx = startIndex;
		while (idx < this.readers.length - 1) {
			IndexedStorageRecordReader reader1 = this.readers[idx];
			IndexedStorageRecordReader reader2 = this.readers[idx+1];
			if (this.readerComparator.compare(reader1, reader2) <= 0) {
				return;
			}
			this.readers[idx] = reader2;
			this.readers[idx+1] = reader1;
			idx++;
		}
	}

	/**
	 * Internal OutputFormat class
	 */
	public static class IndexedStorageOutputFormat extends PigTextOutputFormat {

		public IndexedStorageOutputFormat(byte delimiter, int[] offsetsToIndexKeys) {
			/* Call the base class constructor */
			super(delimiter);

			this.fieldDelimiter = delimiter;
			this.offsetsToIndexKeys = offsetsToIndexKeys;
		}

		@Override
		public RecordWriter<WritableComparable, Tuple> getRecordWriter(
				TaskAttemptContext context) throws IOException,
				InterruptedException {

			Configuration conf = context.getConfiguration();

			FileSystem fs = FileSystem.get(conf);
			Path file = this.getDefaultWorkFile(context, "");    
			FSDataOutputStream fileOut = fs.create(file, false);

			IndexManager indexManager = new IndexManager(offsetsToIndexKeys);
			indexManager.createIndexFile(fs, file);
			return new IndexedStorageRecordWriter(fileOut, this.fieldDelimiter, indexManager);
		}

		/**
		 * Internal class to do the actual record writing and index generation
		 * 
		 */
		public static class IndexedStorageRecordWriter extends PigLineRecordWriter {

			public IndexedStorageRecordWriter(FSDataOutputStream fileOut, byte fieldDel, IndexManager indexManager) throws IOException {
				super(fileOut, fieldDel);

				this.fileOut = fileOut;
				this.indexManager = indexManager;

				/* Write the index header first */
				this.indexManager.WriteIndexHeader();
			}

			@Override
			public void write(WritableComparable key, Tuple value) throws IOException {
				/* Write the data */
				long offset = this.fileOut.getPos();
				super.write(key, value);

				/* Build index */
				this.indexManager.BuildIndex(value, offset);	
			}

			@Override
			public void close(TaskAttemptContext context)
			throws IOException {
				this.indexManager.WriterIndexFooter();
				this.indexManager.Close();
				super.close(context);
			}

			/**
			 * Output stream for data
			 */
			private FSDataOutputStream fileOut;

			/**
			 * Index builder
			 */
			private IndexManager indexManager = null;
		}

		/**
		 * Delimiter to use between fields
		 */
		final private byte fieldDelimiter;

		/**
		 * Offsets to index keys in given tuple
		 */
		final protected int[] offsetsToIndexKeys;
	}


	@Override
	public InputFormat getInputFormat() {
		return new IndexedStorageInputFormat();
	}
	
	@Override
	public Tuple getNext() throws IOException {
		if (this.readers == null) {
			return super.getNext();
		}

		while (currentReaderIndexStart < this.readers.length) {
			IndexedStorageRecordReader r = this.readers[currentReaderIndexStart];

			this.prepareToRead(r, null);
			Tuple tuple = super.getNext();
			if (tuple == null) {
				currentReaderIndexStart++;
				r.close();
				continue; //next Reader
			} 
		
			//if we haven't yet initialized the indexManager (by reading the first index key)	
			if (r.indexManager.lastIndexKeyTuple == null) {

				//initialize the indexManager
				if (r.indexManager.ReadIndex() == null) {
					//There should never be a case where there is a non-null record - but no corresponding index.
					throw new IOException("Missing Index for Tuple: " + tuple);
				}
			}

			r.indexManager.numberOfTuples--;

			if (r.indexManager.numberOfTuples == 0) {
				if (r.indexManager.ReadIndex() == null) {
					r.close();
					currentReaderIndexStart++;
				} else {
					//Since the index of the current reader was increased, we may need to push the
					//current reader back in the sorted list of readers.
					sortReader(currentReaderIndexStart);
				}
			}
			return tuple;
		}

		return null;
	}

	/**
	 * IndexableLoadFunc interface implementation
	 */
	@Override
	public void initialize(Configuration conf) throws IOException {
		try {
			InputFormat inputFormat = this.getInputFormat();
			TaskAttemptID id = TaskAttemptID.forName(conf.get("mapred.task.id"));
			
			if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
		                conf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
			}
			List<FileSplit> fileSplits = inputFormat.getSplits(HadoopShims.createJobContext(conf, null));
			this.readers = new IndexedStorageRecordReader[fileSplits.size()];
			
			int idx = 0;
			Iterator<FileSplit> it = fileSplits.iterator();
			while (it.hasNext()) {
				FileSplit fileSplit = it.next();
				TaskAttemptContext context = HadoopShims.createTaskAttemptContext(conf, id);
				IndexedStorageRecordReader r = (IndexedStorageRecordReader) inputFormat.createRecordReader(fileSplit, context);
				r.initialize(fileSplit, context);
				this.readers[idx] = r;
				idx++;
			}

			Arrays.sort(this.readers, this.readerComparator);
		} catch (InterruptedException e) {
			throw new IOException(e);
		}		
	}

	@Override
	/* The list of readers is always sorted before and after this call. */
	public void seekNear(Tuple keys) throws IOException {

		/* Keeps track of the last (if any) reader where seekNear was called */
		int lastIndexModified = -1;

		int idx = currentReaderIndexStart;
		while (idx < this.readers.length) {
			IndexedStorageRecordReader r = this.readers[idx];

			/* The key falls within the range of the reader index */
			if (keys.compareTo(r.indexManager.maxIndexKeyTuple) <= 0 && keys.compareTo(r.indexManager.minIndexKeyTuple) >= 0) {
				r.seekNear(keys);
				lastIndexModified = idx;

			/* The key is greater than the current range of the reader index */
			} else if (keys.compareTo(r.indexManager.maxIndexKeyTuple) > 0) {
				currentReaderIndexStart++;
			/* DO NOTHING - The key is less than the current range of the reader index */
			} else {
				break;
			}
			idx++;
		}

		/* 
		 * There is something to sort.  
		 * We can rely on the following invariants that make the following check accurate:
 		 *  - currentReaderIndexStart is always >= 0.
		 *  - lastIndexModified is only positive if seekNear was called.
		 *  - lastIndexModified >= currentReaderIndexStart if lastIndexModifed >= 0.  This is true because the list
 		 * is already sorted.
		 */
		if (lastIndexModified - currentReaderIndexStart >= 0) {

			/*
			 * The following logic is optimized for the (common) case where there are a tiny number of readers that
		         * need to be repositioned relative to the other readers in the much larger sorted list.
			 */

			/* First, just sort the readers that were updated relative to one another. */
			Arrays.sort(this.readers, currentReaderIndexStart, lastIndexModified+1, this.readerComparator);

			/* In descending order, push the updated readers back in the the sorted list. */
			for (idx = lastIndexModified; idx >= currentReaderIndexStart; idx--) {
				sortReader(idx);
			}
		}
	}


	@Override
	public void close() throws IOException {
		for (IndexedStorageRecordReader reader : this.readers) {
			reader.close();
		}
	}

	/**
	 * <code>IndexManager</code> manages the index file (both writing and reading)
	 * It keeps track of the last index read during reading.
	 */
	public static class IndexManager {

		/**
		 * Constructor (called during reading)
		 * @param ifile index file to read
		 */
		public IndexManager(FileStatus ifile) {
			this.indexFile = ifile;
			this.offsetToFooter = -1;
		}

		/**
		 * Constructor (called during writing)
		 * @param offsetsToIndexKeys
		 */
		public IndexManager(int[] offsetsToIndexKeys) {
			this.offsetsToIndexKeys = offsetsToIndexKeys;
			this.offsetToFooter = -1;
		}

		/**
		 * Construct index file path for a given a data file
		 * @param file - Data file
		 * @return - Index file path for given data file
		 */
		private static Path getIndexFileName(Path file) {
			return new Path(file.getParent(), "." + file.getName() + ".index");
		}

		/**
		 * Open the index file for writing for given data file
		 * @param fs
		 * @param file
		 * @throws IOException
		 */
		public void createIndexFile(FileSystem fs, Path file) throws IOException {
			this.indexOut = fs.create(IndexManager.getIndexFileName(file), false);	
		}

		/**
	 	 * Opens the index file.
		 */
		public void openIndexFile(FileSystem fs) throws IOException {
			this.indexIn = fs.open(this.indexFile.getPath());	
		}

		/**
		 * Close the index file
		 * @throws IOException
		 */
		public void Close() throws IOException {
			this.indexOut.close();
		}

		/**
		 * Build index tuple
		 * 
		 * @throws IOException
		 */
		private void BuildIndex(Tuple t, long offset) throws IOException {
			/* Build index key tuple */
			Tuple indexKeyTuple = tupleFactory.newTuple(this.offsetsToIndexKeys.length);
			for (int i = 0; i < this.offsetsToIndexKeys.length; ++i) {
				indexKeyTuple.set(i, t.get(this.offsetsToIndexKeys[i]));
			}

			/* Check if we have already seen Tuple(s) with same index keys */
			if (indexKeyTuple.compareTo(this.lastIndexKeyTuple) == 0) {
				/* We have seen Tuple(s) with given index keys, update the tuple count */
				this.numberOfTuples += 1;
			}
			else {
				if (this.lastIndexKeyTuple != null)
					this.WriteIndex();

				this.lastIndexKeyTuple = indexKeyTuple;
				this.minIndexKeyTuple = ((this.minIndexKeyTuple == null) || (indexKeyTuple.compareTo(this.minIndexKeyTuple) < 0)) ? indexKeyTuple : this.minIndexKeyTuple;
				this.maxIndexKeyTuple = ((this.maxIndexKeyTuple == null) || (indexKeyTuple.compareTo(this.maxIndexKeyTuple) > 0)) ? indexKeyTuple : this.maxIndexKeyTuple;

				/* New index tuple for newly seen index key */
				this.indexTuple = tupleFactory.newTuple(3);

				/* Add index keys to index Tuple */
				this.indexTuple.set(0, indexKeyTuple);

				/* Reset Tuple count for index key */
				this.numberOfTuples = 1;

				/* Remember offset to Tuple with new index keys */
				this.indexTuple.set(2, offset);
			}
		}


		/**
		 * Write index header
		 * @param indexOut - Stream to write to
		 * @param ih - Index header to write
		 * @throws IOException
		 */
		public void WriteIndexHeader() throws IOException {
			/* Number of index keys */
			indexOut.writeInt(this.offsetsToIndexKeys.length);

			/* Offset to index keys */
			for (int i = 0; i < this.offsetsToIndexKeys.length; ++i) {
				indexOut.writeInt(this.offsetsToIndexKeys[i]);
			}
		}

		/**
		 * Read index header
		 * @param indexIn - Stream to read from
		 * @return Index header
		 * @throws IOException
		 */
		public void ReadIndexHeader() throws IOException {
			/* Number of index keys */
			int nkeys = this.indexIn.readInt();

			/* Offset to index keys */
			this.offsetsToIndexKeys = new int[nkeys];
			for (int i = 0; i < nkeys; ++i) {
				offsetsToIndexKeys[i] = this.indexIn.readInt();
			}
		}

		/**
		 * Writes the index footer
		 */
		public void WriterIndexFooter() throws IOException {
			/* Flush indexes for remaining records */
			this.WriteIndex();

			/* record the offset to footer */
			this.offsetToFooter = this.indexOut.getPos();

			/* Write index footer */
			DataReaderWriter.writeDatum(indexOut, this.minIndexKeyTuple);
			DataReaderWriter.writeDatum(indexOut, this.maxIndexKeyTuple);

			/* Offset to footer */
			indexOut.writeLong(this.offsetToFooter);
		}

		/**
		 * Reads the index footer
		 */
		public void ReadIndexFooter() throws IOException {
			long currentOffset = this.indexIn.getPos();

			this.SeekToIndexFooter();
			this.minIndexKeyTuple = (Tuple)DataReaderWriter.readDatum(this.indexIn);
			this.maxIndexKeyTuple = (Tuple)DataReaderWriter.readDatum(this.indexIn);

			this.indexIn.seek(currentOffset);
		}

		/**
		 * Seeks to the index footer
		 */
		public void SeekToIndexFooter() throws IOException {
			if (this.offsetToFooter < 0) {
				/* offset to footer is at last long (8 bytes) in the file */
				this.indexIn.seek(this.indexFile.getLen()-8);
				this.offsetToFooter = this.indexIn.readLong();
			}
			this.indexIn.seek(this.offsetToFooter);
		}

		/**
		 * Writes the current index.
		 */
		public void WriteIndex() throws IOException {
			this.indexTuple.set(1, this.numberOfTuples);
			DataReaderWriter.writeDatum(this.indexOut, this.indexTuple);
		}

		/**
		 * Extracts the index key from the index tuple
		 */
		public Tuple getIndexKeyTuple(Tuple indexTuple) throws IOException {
			if (indexTuple.size() == 3) 
				return (Tuple)indexTuple.get(0);
			else
				throw new IOException("Invalid index record with size " + indexTuple.size());
		}

		/**
		 * Extracts the number of records that share the current key from the index tuple.
		 */
		public long getIndexKeyTupleCount(Tuple indexTuple) throws IOException {
			if (indexTuple.size() == 3) 
				return (Long)indexTuple.get(1);
			else
				throw new IOException("Invalid index record with size " + indexTuple.size());
		}

		/**
		 * Extracts the offset into the data file from the index tuple.
		 */
		public long getOffset(Tuple indexTuple) throws IOException {
			if (indexTuple.size() == 3) 
				return (Long)indexTuple.get(2);
			else
				throw new IOException("Invalid index record with size " + indexTuple.size());
		}
	
		/**
	 	 * Reads the next index from the index file (or null if EOF) and extracts
		 * the index fields.
		 */	
		public Tuple ReadIndex() throws IOException {
			if (this.indexIn.getPos() < this.offsetToFooter) {
				indexTuple = (Tuple)DataReaderWriter.readDatum(this.indexIn);
				if (indexTuple != null) {
					this.lastIndexKeyTuple = this.getIndexKeyTuple(indexTuple);
					this.numberOfTuples = this.getIndexKeyTupleCount(indexTuple);
				}
				return indexTuple;
			}
			return null;
		}

		/**
		 * Scans the index looking for a given key.
		 * @return the matching index tuple OR the last index tuple
		 * greater than the requested key if no match is found.
		 */
		public Tuple ScanIndex(Tuple keys) throws IOException {
			if (lastIndexKeyTuple != null && keys.compareTo(this.lastIndexKeyTuple) <= 0) {
				return indexTuple;
			}

			/* Scan the index looking for given key */
			while ((indexTuple = this.ReadIndex()) != null) {
				if (keys.compareTo(this.lastIndexKeyTuple) > 0)
					continue;
				else
					break;
			}
			
			return indexTuple;
		}

		/**
		 * stores the list of record indices that identify keys.
		 */
		private int[] offsetsToIndexKeys = null;

		/**
		 * offset in bytes to the start of the footer of the index.
		 */
		private long offsetToFooter = -1;

		/**
		 * output stream when writing the index.
		 */
		FSDataOutputStream indexOut;

		/**
		 * input stream when reading the index.
		 */
		FSDataInputStream indexIn;

		/**
		 * Tuple factory to create index tuples
		 */
		private TupleFactory tupleFactory = TupleFactory.getInstance();

		/**
		 * Index key tuple of the form
		 * ((Tuple of index keys), count of tuples with index keys, offset to first tuple with index keys)
		 */
		private Tuple indexTuple = tupleFactory.newTuple(3);

		/**
		 * "Smallest" index key tuple seen
		 */
		private Tuple minIndexKeyTuple = null;

		/**
		 * "Biggest" index key tuple seen
		 */
		private Tuple maxIndexKeyTuple = null;

		/**
		 * Last seen index key tuple 
		 */
		private Tuple lastIndexKeyTuple = null;

		/**
		 * Number of tuples seen for a index key
		 */
		private long numberOfTuples = 0;

		/**
	 	 * The index file.
		 */
		private FileStatus indexFile;
	}

	/** 
	 * Internal InputFormat class
	 */
	public static class IndexedStorageInputFormat extends PigTextInputFormat {

		@Override
		public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
			IndexManager im = null;
			try {
				FileSystem fs = FileSystem.get(context.getConfiguration());
				Path indexFile = IndexManager.getIndexFileName(((FileSplit)split).getPath());
				im = new IndexManager(fs.getFileStatus(indexFile));
				im.openIndexFile(fs);
				im.ReadIndexHeader();
				im.ReadIndexFooter();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}

			return new IndexedStorageRecordReader(im);
		}
		
		@Override
		public boolean isSplitable(JobContext context, Path filename) {
			return false;
		}

		/**
		 * Internal RecordReader class
	 	 */
		public static class IndexedStorageRecordReader extends RecordReader<LongWritable, Text> {
			private long start;
			private long pos;
			private long end;
			private IndexedStorageLineReader in;
			private int maxLineLength;
			private LongWritable key = null;
			private Text value = null;
			private IndexManager indexManager = null;

			@Override
			public String toString() {
				return indexManager.minIndexKeyTuple + "|" + indexManager.lastIndexKeyTuple + "|" + indexManager.maxIndexKeyTuple;
			}

			public IndexedStorageRecordReader(IndexManager im) {
				this.indexManager = im;
			}

			/**
			 * Class to compare record readers using underlying indexes
			 *
			 */
			public static class IndexedStorageRecordReaderComparator implements Comparator<IndexedStorageRecordReader> {
				@Override
				public int compare(IndexedStorageRecordReader o1, IndexedStorageRecordReader o2) {
					Tuple t1 = (o1.indexManager.lastIndexKeyTuple == null) ?  o1.indexManager.minIndexKeyTuple : o1.indexManager.lastIndexKeyTuple;
					Tuple t2 = (o2.indexManager.lastIndexKeyTuple == null) ?  o2.indexManager.minIndexKeyTuple : o2.indexManager.lastIndexKeyTuple;
					return t1.compareTo(t2);
				}
			}

			public static class IndexedStorageLineReader {
				private static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
				private int bufferSize = DEFAULT_BUFFER_SIZE;
				private InputStream in;
				private byte[] buffer;
				// the number of bytes of real data in the buffer
				private int bufferLength = 0;
				// the current position in the buffer
				private int bufferPosn = 0;
				private long bufferOffset = 0;

				private static final byte CR = '\r';
				private static final byte LF = '\n';

				/**
				 * Create a line reader that reads from the given stream using the
				 * default buffer-size (64k).
				 * @param in The input stream
				 * @throws IOException
				 */
				public IndexedStorageLineReader(InputStream in) {
					this(in, DEFAULT_BUFFER_SIZE);
				}

				/**
				 * Create a line reader that reads from the given stream using the 
				 * given buffer-size.
				 * @param in The input stream
				 * @param bufferSize Size of the read buffer
				 * @throws IOException
				 */
				public IndexedStorageLineReader(InputStream in, int bufferSize) {
					if( !(in instanceof Seekable) || !(in instanceof PositionedReadable) ) {
					      throw new IllegalArgumentException(
					          "In is not an instance of Seekable or PositionedReadable");
					}
					
					this.in = in;
					this.bufferSize = bufferSize;
					this.buffer = new byte[this.bufferSize];
				}

				/**
				 * Create a line reader that reads from the given stream using the
				 * <code>io.file.buffer.size</code> specified in the given
				 * <code>Configuration</code>.
				 * @param in input stream
				 * @param conf configuration
				 * @throws IOException
				 */
				public IndexedStorageLineReader(InputStream in, Configuration conf) throws IOException {
					this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE));
				}

				/**
				 * Close the underlying stream.
				 * @throws IOException
				 */
				public void close() throws IOException {
					in.close();
				}

				/**
				 * Read one line from the InputStream into the given Text.  A line
				 * can be terminated by one of the following: '\n' (LF) , '\r' (CR),
				 * or '\r\n' (CR+LF).  EOF also terminates an otherwise unterminated
				 * line.
				 *
				 * @param str the object to store the given line (without newline)
				 * @param maxLineLength the maximum number of bytes to store into str;
				 *  the rest of the line is silently discarded.
				 * @param maxBytesToConsume the maximum number of bytes to consume
				 *  in this call.  This is only a hint, because if the line cross
				 *  this threshold, we allow it to happen.  It can overshoot
				 *  potentially by as much as one buffer length.
				 *
				 * @return the number of bytes read including the (longest) newline
				 * found.
				 *
				 * @throws IOException if the underlying stream throws
				 */
				public int readLine(Text str, int maxLineLength,
						int maxBytesToConsume) throws IOException {
					/* We're reading data from in, but the head of the stream may be
					 * already buffered in buffer, so we have several cases:
					 * 1. No newline characters are in the buffer, so we need to copy
					 *    everything and read another buffer from the stream.
					 * 2. An unambiguously terminated line is in buffer, so we just
					 *    copy to str.
					 * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
					 *    in CR.  In this case we copy everything up to CR to str, but
					 *    we also need to see what follows CR: if it's LF, then we
					 *    need consume LF as well, so next call to readLine will read
					 *    from after that.
					 * We use a flag prevCharCR to signal if previous character was CR
					 * and, if it happens to be at the end of the buffer, delay
					 * consuming it until we have a chance to look at the char that
					 * follows.
					 */
					str.clear();
					int txtLength = 0; //tracks str.getLength(), as an optimization
					int newlineLength = 0; //length of terminating newline
					boolean prevCharCR = false; //true of prev char was CR
					long bytesConsumed = 0;
					do {
						int startPosn = bufferPosn; //starting from where we left off the last time
						if (bufferPosn >= bufferLength) {
							startPosn = bufferPosn = 0;
							if (prevCharCR)
								++bytesConsumed; //account for CR from previous read
							
							bufferOffset = ((Seekable)in).getPos();
							bufferLength = in.read(buffer);
							
							if (bufferLength <= 0)
								break; // EOF
						}
						for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
							if (buffer[bufferPosn] == LF) {
								newlineLength = (prevCharCR) ? 2 : 1;
								++bufferPosn; // at next invocation proceed from following byte
								break;
							}
							if (prevCharCR) { //CR + notLF, we are at notLF
								newlineLength = 1;
								break;
							}
							prevCharCR = (buffer[bufferPosn] == CR);
						}
						int readLength = bufferPosn - startPosn;
						if (prevCharCR && newlineLength == 0)
							--readLength; //CR at the end of the buffer
						bytesConsumed += readLength;
						int appendLength = readLength - newlineLength;
						if (appendLength > maxLineLength - txtLength) {
							appendLength = maxLineLength - txtLength;
						}
						if (appendLength > 0) {
							str.append(buffer, startPosn, appendLength);
							txtLength += appendLength;
						}
					} while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

					if (bytesConsumed > (long)Integer.MAX_VALUE)
						throw new IOException("Too many bytes before newline: " + bytesConsumed);    
					return (int)bytesConsumed;
				}

				/**
				 * Read from the InputStream into the given Text.
				 * @param str the object to store the given line
				 * @param maxLineLength the maximum number of bytes to store into str.
				 * @return the number of bytes read including the newline
				 * @throws IOException if the underlying stream throws
				 */
				public int readLine(Text str, int maxLineLength) throws IOException {
					return readLine(str, maxLineLength, Integer.MAX_VALUE);
				}

				/**
				 * Read from the InputStream into the given Text.
				 * @param str the object to store the given line
				 * @return the number of bytes read including the newline
				 * @throws IOException if the underlying stream throws
				 */
				public int readLine(Text str) throws IOException {
					return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE);
				}
				
				/**
				 * If given offset is within the buffer, adjust the buffer position to read from
				 * otherwise seek to the given offset from start of the file.
				 * @param offset
				 * @throws IOException
				 */
				public void seek(long offset) throws IOException {
					if ((offset >= bufferOffset) && (offset < (bufferOffset + bufferLength)))
						bufferPosn = (int) (offset - bufferOffset);
					else {
						bufferPosn = bufferLength;
						((Seekable)in).seek(offset);
					}
				}
			}

			@Override
			public void initialize(InputSplit genericSplit, TaskAttemptContext context)
			throws IOException, InterruptedException {

				FileSplit split = (FileSplit) genericSplit;
				Configuration job = context.getConfiguration();
				this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
				start = split.getStart();
				end = start + split.getLength();
				final Path file = split.getPath();

				FileSystem fs = file.getFileSystem(job);
				FSDataInputStream fileIn = fs.open(split.getPath());
				boolean skipFirstLine = false;
				if (start != 0) {
					skipFirstLine = true;
					--start;
					fileIn.seek(start);
				}
				in = new IndexedStorageLineReader(fileIn, job);
				if (skipFirstLine) {
					start += in.readLine(new Text(), 0, (int)Math.min((long)Integer.MAX_VALUE, end - start));
				}
				this.pos = start;
			}

			public void seek(long offset) throws IOException {
				in.seek(offset);
				pos = offset;
			}

			/**
			 * Scan the index for given key and seek to appropriate offset in the data 
			 * @param keys to look for
			 * @return true if the given key was found, false otherwise
			 * @throws IOException
			 */
			public boolean seekNear(Tuple keys) throws IOException {
				boolean ret = false;
				Tuple indexTuple = this.indexManager.ScanIndex(keys);
				if (indexTuple != null) {
					long offset = this.indexManager.getOffset(indexTuple) ;
					in.seek(offset);
				
					if (keys.compareTo(this.indexManager.getIndexKeyTuple(indexTuple)) == 0) {
						ret = true;
					}
				}
				
				return ret;
			}

			@Override
			public boolean nextKeyValue() throws IOException,
			InterruptedException {
				if (key == null) {
					key = new LongWritable();
				}
				key.set(pos);
				if (value == null) {
					value = new Text();
				}
				int newSize = 0;
				while (pos < end) {
					newSize = in.readLine(value, maxLineLength,
							Math.max((int)Math.min(Integer.MAX_VALUE, end-pos),
									maxLineLength));
					if (newSize == 0) {
						break;
					}
					pos += newSize;
					if (newSize < maxLineLength) {
						break;
					}
				}
				if (newSize == 0) {
					key = null;
					value = null;
					return false;
				} else {
					return true;
				}
			}

			@Override
			public LongWritable getCurrentKey() throws IOException,
			InterruptedException {
				return key;
			}

			@Override
			public Text getCurrentValue() throws IOException,
			InterruptedException {
				return value;
			}

			@Override
			public float getProgress() throws IOException, InterruptedException {
				if (start == end) {
					return 0.0f;
				} else {
					return Math.min(1.0f, (pos - start) / (float)(end - start));
				}
			}

			@Override
			public void close() throws IOException {
				if (in != null) {
					in.close(); 
				}
			}
		}
	}


	/**
	 * List of record readers.
	 */
	protected IndexedStorageRecordReader[] readers = null;	

	/**
	 * Index into the the list of readers to the current reader.  
 	 * Readers before this index have been fully scanned for keys.
	 */
	protected int currentReaderIndexStart = 0;

	/**
	 * Delimiter to use between fields
	 */
	protected byte fieldDelimiter = '\t';

	/**
	 * Offsets to index keys in tuple
	 */
	final protected int[] offsetsToIndexKeys;

	/**
	 * Comparator used to compare key tuples.
	 */
	protected Comparator<IndexedStorageRecordReader> readerComparator = new IndexedStorageInputFormat.IndexedStorageRecordReader.IndexedStorageRecordReaderComparator();
}