package com.thinkaurelius.faunus.formats.script;
import com.thinkaurelius.faunus.FaunusVertex;
import com.thinkaurelius.faunus.formats.VertexQueryFilter;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;
/**
* ScriptInputFormat supports the arbitrary parsing of a \n-based file format.
* Each line of the file is passed to the Gremlin/Groovy script identified by the faunus.input.script.file property.
* The Gremlin/Groovy file must have a method with the following signature:
* <p/>
* def boolean read(FaunusVertex vertex, String line) { ... }
* <p/>
* The FaunusVertex argument is a reusable object to avoid object creation (see FaunusVertex.reuse(long)).
* The String argument is the \n-line out of the file at the faunus.input.location.
* The boolean denotes whether or not the provided line yielded a successful creation of a FaunusVertex.
*
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
public class ScriptInputFormat extends FileInputFormat<NullWritable, FaunusVertex> implements Configurable {
public static final String FAUNUS_GRAPH_INPUT_SCRIPT_FILE = "faunus.graph.input.script.file";
private VertexQueryFilter vertexQuery;
private Configuration config;
@Override
public RecordReader<NullWritable, FaunusVertex> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException {
return new ScriptRecordReader(this.vertexQuery, context);
}
@Override
protected boolean isSplitable(final JobContext context, final Path file) {
return null == new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
}
@Override
public void setConf(final Configuration config) {
this.config = config;
this.vertexQuery = VertexQueryFilter.create(config);
}
@Override
public Configuration getConf() {
return this.config;
}
}