package com.thinkaurelius.faunus.formats.titan;
import com.thinkaurelius.faunus.FaunusVertex;
import com.thinkaurelius.faunus.Holder;
import com.thinkaurelius.faunus.formats.BlueprintsGraphOutputMapReduce;
import com.thinkaurelius.faunus.formats.JobConfigurationFormat;
import com.thinkaurelius.faunus.formats.MapReduceFormat;
import com.thinkaurelius.faunus.formats.noop.NoOpOutputFormat;
import com.thinkaurelius.faunus.mapreduce.FaunusCompiler;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
/**
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
public abstract class TitanOutputFormat extends NoOpOutputFormat implements MapReduceFormat {
public static final String FAUNUS_GRAPH_OUTPUT_TITAN = "faunus.graph.output.titan";
public static final String FAUNUS_GRAPH_OUTPUT_TITAN_INFER_SCHEMA = "faunus.graph.output.titan.infer-schema";
@Override
public void addMapReduceJobs(final FaunusCompiler compiler) {
if (compiler.getConf().getBoolean(FAUNUS_GRAPH_OUTPUT_TITAN_INFER_SCHEMA, true)) {
compiler.addMapReduce(SchemaInferencerMapReduce.Map.class,
null,
SchemaInferencerMapReduce.Reduce.class,
LongWritable.class,
FaunusVertex.class,
NullWritable.class,
FaunusVertex.class,
SchemaInferencerMapReduce.createConfiguration());
}
compiler.addMapReduce(BlueprintsGraphOutputMapReduce.VertexMap.class,
null,
BlueprintsGraphOutputMapReduce.Reduce.class,
LongWritable.class,
Holder.class,
NullWritable.class,
FaunusVertex.class,
BlueprintsGraphOutputMapReduce.createConfiguration());
compiler.addMap(BlueprintsGraphOutputMapReduce.EdgeMap.class,
NullWritable.class,
FaunusVertex.class,
BlueprintsGraphOutputMapReduce.createConfiguration());
}
/*@Override
public void updateJob(final Job job) throws InterruptedException, IOException {
try {
final Configuration configuration = job.getConfiguration();
if (FileInputFormat.class.isAssignableFrom(job.getInputFormatClass())) {
final Long splitSize = configuration.getLong("mapred.max.split.size", -1);
if (splitSize == -1)
throw new InterruptedException("Can not determine the number of reduce tasks if mapred.max.split.size is not set");
final Path[] paths = FileInputFormat.getInputPaths(job);
final PathFilter filter = FileInputFormat.getInputPathFilter(job);
final FileSystem fs = FileSystem.get(configuration);
Long totalSize = 0l;
for (final Path path : paths) {
totalSize = totalSize + HDFSTools.getFileSize(fs, path, filter);
}
final int reduceTasks = (int) (totalSize.doubleValue() / splitSize.doubleValue());
job.setNumReduceTasks((reduceTasks == 0) ? 1 : reduceTasks);
} else {
if (-1 == configuration.getInt("mapred.reduce.tasks", -1)) {
throw new InterruptedException("The input to Titan is not in HDFS and source size can not be determined -- set mapred.reduce.tasks");
}
}
} catch (final ClassNotFoundException e) {
throw new InterruptedException(e.getMessage());
}
}*/
}