package com.thinkaurelius.faunus.formats.rexster;
import com.thinkaurelius.faunus.FaunusVertex;
import com.thinkaurelius.faunus.formats.VertexQueryFilter;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @author Stephen Mallette (http://stephen.genoprime.com)
*/
public class RexsterInputFormat extends InputFormat<NullWritable, FaunusVertex> implements Configurable {
private long estimatedVertexCount;
private RexsterConfiguration rexsterConf;
private VertexQueryFilter vertexQuery;
@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException {
final int chunks = jobContext.getConfiguration().getInt("mapred.map.tasks", 1);
final long chunkSize = (estimatedVertexCount / chunks);
final List<InputSplit> splits = new ArrayList<InputSplit>();
for (int i = 0; i < chunks; i++) {
final RexsterInputSplit split;
if ((i + 1) == chunks) {
// the last chunk should run to rexster's end. since this calculation
// runs on an estimated count there's no way to know the exact end value.
split = new RexsterInputSplit(i * chunkSize, Long.MAX_VALUE);
} else {
split = new RexsterInputSplit(i * chunkSize, (i * chunkSize) + chunkSize);
}
// System.out.println(split);
splits.add(split);
}
return splits;
}
@Override
public RecordReader<NullWritable, FaunusVertex> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
return new RexsterRecordReader(this.rexsterConf, this.vertexQuery);
}
@Override
public void setConf(Configuration config) {
this.rexsterConf = new RexsterConfiguration(config);
this.estimatedVertexCount = this.rexsterConf.getEstimatedVertexCount();
this.vertexQuery = VertexQueryFilter.create(config);
}
@Override
public Configuration getConf() {
return this.rexsterConf.getConf();
}
}