package com.thinkaurelius.faunus.mapreduce.sideeffect; import com.thinkaurelius.faunus.FaunusEdge; import com.thinkaurelius.faunus.FaunusVertex; import com.thinkaurelius.faunus.Tokens; import com.thinkaurelius.faunus.mapreduce.util.CounterMap; import com.thinkaurelius.faunus.mapreduce.util.EmptyConfiguration; import com.thinkaurelius.faunus.mapreduce.util.SafeMapperOutputs; import com.thinkaurelius.faunus.mapreduce.util.SafeReducerOutputs; import com.tinkerpop.blueprints.Direction; import com.tinkerpop.blueprints.Edge; import com.tinkerpop.blueprints.Element; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.gremlin.groovy.jsr223.GremlinGroovyScriptEngine; import groovy.lang.Closure; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import javax.script.ScriptEngine; import javax.script.ScriptException; import java.io.IOException; /** * @author Marko A. Rodriguez (http://markorodriguez.com) */ public class GroupCountMapReduce { public static final String KEY_CLOSURE = Tokens.makeNamespace(GroupCountMapReduce.class) + ".keyClosure"; public static final String VALUE_CLOSURE = Tokens.makeNamespace(GroupCountMapReduce.class) + ".valueClosure"; public static final String CLASS = Tokens.makeNamespace(GroupCountMapReduce.class) + ".class"; private static final ScriptEngine engine = new GremlinGroovyScriptEngine(); public enum Counters { VERTICES_PROCESSED, OUT_EDGES_PROCESSED } public static Configuration createConfiguration(final Class<? extends Element> klass, final String keyClosure, final String valueClosure) { final Configuration configuration = new EmptyConfiguration(); configuration.setClass(CLASS, klass, Element.class); if (null != keyClosure) configuration.set(KEY_CLOSURE, keyClosure); if (null != valueClosure) configuration.set(VALUE_CLOSURE, valueClosure); return configuration; } public static class Map extends Mapper<NullWritable, FaunusVertex, Text, LongWritable> { private Closure keyClosure; private Closure valueClosure; private boolean isVertex; private CounterMap<Object> map; private int mapSpillOver; private SafeMapperOutputs outputs; @Override public void setup(final Mapper.Context context) throws IOException, InterruptedException { try { this.mapSpillOver = context.getConfiguration().getInt(Tokens.FAUNUS_PIPELINE_MAP_SPILL_OVER, Tokens.DEFAULT_MAP_SPILL_OVER); final String keyClosureString = context.getConfiguration().get(KEY_CLOSURE, null); if (null == keyClosureString) this.keyClosure = null; else this.keyClosure = (Closure) engine.eval(keyClosureString); final String valueClosureString = context.getConfiguration().get(VALUE_CLOSURE, null); if (null == valueClosureString) this.valueClosure = null; else this.valueClosure = (Closure) engine.eval(valueClosureString); } catch (final ScriptException e) { throw new IOException(e.getMessage(), e); } this.isVertex = context.getConfiguration().getClass(CLASS, Element.class, Element.class).equals(Vertex.class); this.map = new CounterMap<Object>(); this.outputs = new SafeMapperOutputs(context); } @Override public void map(final NullWritable key, final FaunusVertex value, final Mapper<NullWritable, FaunusVertex, Text, LongWritable>.Context context) throws IOException, InterruptedException { if (this.isVertex) { if (value.hasPaths()) { final Object object = (null == this.keyClosure) ? new FaunusVertex.MicroVertex(value.getIdAsLong()) : this.keyClosure.call(value); final Number number = (null == this.valueClosure) ? 1 : (Number) this.valueClosure.call(value); this.map.incr(object, number.longValue() * value.pathCount()); context.getCounter(Counters.VERTICES_PROCESSED).increment(1l); } } else { long edgesProcessed = 0; for (final Edge e : value.getEdges(Direction.OUT)) { final FaunusEdge edge = (FaunusEdge) e; if (edge.hasPaths()) { final Object object = (null == this.keyClosure) ? new FaunusEdge.MicroEdge(edge.getIdAsLong()) : this.keyClosure.call(edge); final Number number = (null == this.valueClosure) ? 1 : (Number) this.valueClosure.call(edge); this.map.incr(object, number.longValue() * edge.pathCount()); edgesProcessed++; } } context.getCounter(Counters.OUT_EDGES_PROCESSED).increment(edgesProcessed); } // protected against memory explosion if (this.map.size() > this.mapSpillOver) { this.dischargeMap(context); } this.outputs.write(Tokens.GRAPH, NullWritable.get(), value); } private final Text textWritable = new Text(); private final LongWritable longWritable = new LongWritable(); public void dischargeMap(final Mapper<NullWritable, FaunusVertex, Text, LongWritable>.Context context) throws IOException, InterruptedException { for (final java.util.Map.Entry<Object, Long> entry : this.map.entrySet()) { this.textWritable.set(null == entry.getKey() ? Tokens.NULL : entry.getKey().toString()); this.longWritable.set(entry.getValue()); context.write(this.textWritable, this.longWritable); } this.map.clear(); } @Override public void cleanup(final Mapper<NullWritable, FaunusVertex, Text, LongWritable>.Context context) throws IOException, InterruptedException { this.dischargeMap(context); this.outputs.close(); } } public static class Combiner extends Reducer<Text, LongWritable, Text, LongWritable> { private final LongWritable longWritable = new LongWritable(); @Override public void reduce(final Text key, final Iterable<LongWritable> values, final Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException { long totalCount = 0; for (final LongWritable token : values) { totalCount = totalCount + token.get(); } this.longWritable.set(totalCount); context.write(key, this.longWritable); } } public static class Reduce extends Reducer<Text, LongWritable, Text, LongWritable> { private SafeReducerOutputs outputs; @Override public void setup(final Reducer.Context context) throws IOException, InterruptedException { this.outputs = new SafeReducerOutputs(context); } private final LongWritable longWritable = new LongWritable(); @Override public void reduce(final Text key, final Iterable<LongWritable> values, final Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException { long totalCount = 0; for (final LongWritable token : values) { totalCount = totalCount + token.get(); } this.longWritable.set(totalCount); this.outputs.write(Tokens.SIDEEFFECT, key, this.longWritable); } @Override public void cleanup(final Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException { this.outputs.close(); } } }