/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.hadoop.mapreduce;
import alluxio.client.keyvalue.KeyValueIterator;
import alluxio.client.keyvalue.KeyValuePair;
import alluxio.client.keyvalue.KeyValuePartitionReader;
import alluxio.client.keyvalue.KeyValueSystem;
import alluxio.exception.AlluxioException;
import alluxio.util.io.BufferUtils;
import com.google.common.base.Preconditions;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
import javax.annotation.concurrent.ThreadSafe;
/**
* Implements {@link RecordReader} that breaks the input from a key-value store data into records
* and input records to the Mapper. Each record is a key-value pair stored in a partition of the
* {@link KeyValueSystem}.
*/
@ThreadSafe
final class KeyValueRecordReader extends RecordReader<BytesWritable, BytesWritable> {
/** The partition reader for reading the key-value pairs. */
private KeyValuePartitionReader mReader;
/** The iterator for iterating through all key-value pairs contained in the partition. */
private KeyValueIterator mKeyValuePairIterator;
/** Number of key-value pairs visited by the iterator. */
private int mNumVisitedKeyValuePairs;
/** Number of key-value pairs. */
private int mNumKeyValuePairs;
/** Current key. */
private BytesWritable mCurrentKey;
/** Current value. */
private BytesWritable mCurrentValue;
/**
* Creates a {@link KeyValueRecordReader} for generating key-value pairs of a partition.
*/
public KeyValueRecordReader() {}
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
Preconditions.checkArgument(split instanceof KeyValueInputSplit);
try {
if (!(split instanceof KeyValueInputSplit)) {
throw new IOException("Split is required to be KeyValueInputSplit");
}
mReader =
KeyValuePartitionReader.Factory.create(((KeyValueInputSplit) split).getPartitionId());
mKeyValuePairIterator = mReader.iterator();
mNumVisitedKeyValuePairs = 0;
mNumKeyValuePairs = mReader.size();
mCurrentKey = new BytesWritable();
mCurrentValue = new BytesWritable();
} catch (AlluxioException e) {
throw new IOException(e);
}
}
@Override
public synchronized BytesWritable getCurrentKey() {
return mCurrentKey;
}
@Override
public synchronized BytesWritable getCurrentValue() {
return mCurrentValue;
}
@Override
public synchronized boolean nextKeyValue() throws IOException {
if (!mKeyValuePairIterator.hasNext()) {
return false;
}
KeyValuePair pair;
try {
pair = mKeyValuePairIterator.next();
} catch (AlluxioException e) {
throw new IOException(e);
}
// TODO(cc): Implement a ByteBufferInputStream which is backed by a ByteBuffer so we could
// benefit from zero-copy.
mCurrentKey.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getKey())));
mCurrentValue.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getValue())));
mNumVisitedKeyValuePairs++;
return true;
}
@Override
public synchronized void close() throws IOException {
mReader.close();
}
@Override
public synchronized float getProgress() throws IOException {
if (mNumKeyValuePairs == 0) {
return 1.0f;
}
return ((float) mNumVisitedKeyValuePairs) / mNumKeyValuePairs;
}
}