/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.hadoop.mapreduce;
import alluxio.AlluxioURI;
import alluxio.annotation.PublicApi;
import alluxio.client.file.FileSystemContext;
import alluxio.client.keyvalue.KeyValueMasterClient;
import alluxio.client.keyvalue.KeyValueSystem;
import alluxio.thrift.PartitionInfo;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.concurrent.ThreadSafe;
/**
* Implementation of {@code org.apache.hadoop.mapred.InputFormat} for MapReduce programs to access
* {@link KeyValueSystem}.
* <p>
* It takes a {@link KeyValueSystem} URI, and emits key-value pairs stored in the KeyValueStore to
* {@code org.apache.hadoop.mapred.Mapper}s.
*/
@PublicApi
@ThreadSafe
public final class KeyValueInputFormat extends InputFormat<BytesWritable, BytesWritable> {
private final KeyValueMasterClient mKeyValueMasterClient =
new KeyValueMasterClient(FileSystemContext.INSTANCE.getMasterAddress());
/**
* Constructs a new {@link KeyValueInputFormat}.
*/
public KeyValueInputFormat() {}
/**
* Returns a list of {@link KeyValueInputSplit} where each split is one key-value partition.
*
* @param jobContext MapReduce job configuration
* @return list of {@link InputSplit}s, each split is a partition
*/
@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
// The paths are MapReduce program's inputs specified in
// {@code mapreduce.input.fileinputformat.inputdir}, each path should be a key-value store.
Path[] paths = FileInputFormat.getInputPaths(jobContext);
List<InputSplit> splits = new ArrayList<>();
try {
for (Path path : paths) {
List<PartitionInfo> partitionInfos =
mKeyValueMasterClient.getPartitionInfo(new AlluxioURI(path.toString()));
for (PartitionInfo partitionInfo : partitionInfos) {
splits.add(new KeyValueInputSplit(partitionInfo));
}
}
} catch (Exception e) {
throw new IOException(e);
}
return splits;
}
@Override
public RecordReader<BytesWritable, BytesWritable> createRecordReader(InputSplit inputSplit,
TaskAttemptContext taskContext) throws IOException {
return new KeyValueRecordReader();
}
}