package com.hadooparchitecturebook.zombie.giraph;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.giraph.edge.DefaultEdge;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.graph.Vertex;
import org.apache.giraph.io.formats.TextVertexInputFormat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.log4j.Logger;
public class ZombieTextVertexInputFormat extends TextVertexInputFormat<LongWritable, Text, LongWritable> {
private static final Logger LOG = Logger.getLogger(ZombieTextVertexInputFormat.class);
@Override
public TextVertexReader createVertexReader(
InputSplit split, TaskAttemptContext context) throws IOException {
return new ZombieTextReader();
}
//InputFormat
//{vertexId}|{Type}|{common seperated vertexId of bitable people}
public class ZombieTextReader extends TextVertexReader {
@Override
public boolean nextVertex() throws IOException, InterruptedException {
return getRecordReader().nextKeyValue();
}
@Override
public Vertex<LongWritable, Text, LongWritable> getCurrentVertex()
throws IOException, InterruptedException {
Text line = getRecordReader().getCurrentValue();
String[] majorParts = line.toString().split("\\|");
LongWritable id = new LongWritable(Long.parseLong(majorParts[0]));
Text value = new Text(majorParts[1]);
ArrayList<Edge<LongWritable, LongWritable>> edgeIdList = new ArrayList<Edge<LongWritable, LongWritable>>();
if (majorParts.length > 2) {
String[] edgeIds = majorParts[2].split(",");
for (String edgeId: edgeIds) {
DefaultEdge<LongWritable, LongWritable> edge = new DefaultEdge<LongWritable, LongWritable>();
LongWritable longEdgeId = new LongWritable(Long.parseLong(edgeId));
edge.setTargetVertexId(longEdgeId);
edge.setValue(longEdgeId); // dummy value
edgeIdList.add(edge);
}
}
Vertex<LongWritable, Text, LongWritable> vertex = getConf().createVertex();
vertex.initialize(id, value, edgeIdList);
return vertex;
}
}
}