package com.thinkaurelius.faunus.mapreduce.transform;
import com.thinkaurelius.faunus.FaunusVertex;
import com.thinkaurelius.faunus.Tokens;
import com.thinkaurelius.faunus.mapreduce.util.EmptyConfiguration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
/**
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
public class VertexMap {
public static final String IDS = Tokens.makeNamespace(VertexMap.class) + ".ids";
public enum Counters {
VERTICES_PROCESSED
}
public static Configuration createConfiguration(final long... ids) {
final String[] idStrings = new String[ids.length];
for (int i = 0; i < ids.length; i++) {
idStrings[i] = String.valueOf(ids[i]);
}
final Configuration configuration = new EmptyConfiguration();
configuration.setStrings(IDS, idStrings);
return configuration;
}
public static class Map extends Mapper<NullWritable, FaunusVertex, NullWritable, FaunusVertex> {
private Collection<Long> ids;
@Override
public void setup(final Mapper.Context context) throws IOException, InterruptedException {
//todo: make as list and double up repeats
this.ids = VertexMap.Map.getLongCollection(context.getConfiguration(), IDS, new HashSet<Long>());
}
@Override
public void map(final NullWritable key, final FaunusVertex value, final Mapper<NullWritable, FaunusVertex, NullWritable, FaunusVertex>.Context context) throws IOException, InterruptedException {
if (this.ids.contains(value.getIdAsLong())) {
value.startPath();
context.getCounter(Counters.VERTICES_PROCESSED).increment(1l);
} else {
value.clearPaths();
}
context.write(NullWritable.get(), value);
}
private static Collection<Long> getLongCollection(final Configuration conf, final String key, final Collection<Long> collection) {
for (final String value : conf.getStrings(key)) {
collection.add(Long.valueOf(value));
}
return collection;
}
}
}