/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package ldbc.snb.datagen.hadoop;
import ldbc.snb.datagen.dictionary.Dictionaries;
import ldbc.snb.datagen.generator.DatagenParams;
import ldbc.snb.datagen.generator.LDBCDatagen;
import ldbc.snb.datagen.generator.PersonActivityGenerator;
import ldbc.snb.datagen.objects.Knows;
import ldbc.snb.datagen.objects.Person;
import ldbc.snb.datagen.serializer.PersonActivitySerializer;
import ldbc.snb.datagen.serializer.UpdateEventSerializer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
/**
*
* @author aprat
*/
public class HadoopPersonActivityGenerator {
public static class HadoopPersonActivityGeneratorReducer extends Reducer<BlockKey, Person, LongWritable, Person> {
private int reducerId; /** The id of the reducer.**/
private PersonActivitySerializer personActivitySerializer_;
private PersonActivityGenerator personActivityGenerator_;
private UpdateEventSerializer updateSerializer_;
private OutputStream personFactors_;
private OutputStream activityFactors_;
private OutputStream friends_;
private FileSystem fs_;
protected void setup(Context context) {
System.out.println("Setting up reducer for person activity generation");
Configuration conf = context.getConfiguration();
reducerId = context.getTaskAttemptID().getTaskID().getId();
LDBCDatagen.init(conf);
try {
personActivitySerializer_ = (PersonActivitySerializer) Class.forName(conf.get("ldbc.snb.datagen.serializer.personActivitySerializer")).newInstance();
personActivitySerializer_.initialize(conf,reducerId);
if(DatagenParams.updateStreams) {
updateSerializer_ = new UpdateEventSerializer(conf, DatagenParams.hadoopDir + "/temp_updateStream_forum_" + reducerId, reducerId, DatagenParams.numUpdatePartitions);
}
personActivityGenerator_ = new PersonActivityGenerator(personActivitySerializer_, updateSerializer_);
fs_ = FileSystem.get(context.getConfiguration());
personFactors_ = fs_.create(new Path(DatagenParams.hadoopDir+"/"+ "m" + reducerId + DatagenParams.PERSON_COUNTS_FILE));
activityFactors_ = fs_.create(new Path(DatagenParams.hadoopDir+"/"+ "m" + reducerId + DatagenParams.ACTIVITY_FILE));
friends_ = fs_.create(new Path(DatagenParams.hadoopDir+"/"+ "m0friendList" + reducerId +".csv"));
} catch( Exception e ) {
System.err.println(e.getMessage());
e.printStackTrace();
}
}
@Override
public void reduce(BlockKey key, Iterable<Person> valueSet,Context context)
throws IOException, InterruptedException {
System.out.println("Reducing block "+key.block);
ArrayList<Person> persons = new ArrayList<Person>();
for( Person p : valueSet ) {
persons.add(new Person(p));
StringBuilder strbuf = new StringBuilder();
strbuf.append(p.accountId());
for( Knows k : p.knows() ) {
strbuf.append(",");
strbuf.append(k.to().accountId());
if( k.creationDate() > Dictionaries.dates.getUpdateThreshold() && DatagenParams.updateStreams ) {
updateSerializer_.export(p,k);
}
}
if( DatagenParams.updateStreams ) {
updateSerializer_.changePartition();
}
strbuf.append("\n");
friends_.write(strbuf.toString().getBytes("UTF8"));
}
System.out.println("Starting generation of block: "+key.block);
personActivityGenerator_.generateActivityForBlock((int)key.block, persons, context );
System.out.println("Writing person factors for block: "+key.block);
personActivityGenerator_.writePersonFactors(personFactors_);
}
protected void cleanup(Context context){
try {
System.out.println("Cleaning up");
personActivityGenerator_.writeActivityFactors(activityFactors_);
activityFactors_.close();
personFactors_.close();
friends_.close();
} catch (IOException e) {
e.printStackTrace();
}
personActivitySerializer_.close();
if(DatagenParams.updateStreams) {
updateSerializer_.close();
}
}
}
private Configuration conf;
public HadoopPersonActivityGenerator(Configuration conf) {
this.conf = conf;
}
public void run( String inputFileName ) throws AssertionError, Exception {
FileSystem fs = FileSystem.get(conf);
System.out.println("RANKING");
String rankedFileName = conf.get("ldbc.snb.datagen.serializer.hadoopDir") + "/ranked";
HadoopFileRanker hadoopFileRanker = new HadoopFileRanker( conf, TupleKey.class, Person.class, null );
hadoopFileRanker.run(inputFileName,rankedFileName);
System.out.println("GENERATING");
int numThreads = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads"));
Job job = Job.getInstance(conf, "Person Activity Generator/Serializer");
job.setMapOutputKeyClass(BlockKey.class);
job.setMapOutputValueClass(Person.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Person.class);
job.setJarByClass(HadoopBlockMapper.class);
job.setMapperClass(HadoopBlockMapper.class);
job.setReducerClass(HadoopPersonActivityGeneratorReducer.class);
job.setNumReduceTasks(numThreads);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setSortComparatorClass(BlockKeyComparator.class);
job.setGroupingComparatorClass(BlockKeyGroupComparator.class);
job.setPartitionerClass(HadoopBlockPartitioner.class);
/** PROFILING OPTIONS **/
//job.setProfileEnabled(true);
//job.setProfileParams("-agentlib:hprof=cpu=samples,heap=sites,depth=4,thread=y,format=b,file=%s");
//job.setProfileTaskRange(true,"0-1");
//job.setProfileTaskRange(false,"0-1");
/****/
FileInputFormat.setInputPaths(job, new Path(rankedFileName));
FileOutputFormat.setOutputPath(job, new Path(conf.get("ldbc.snb.datagen.serializer.hadoopDir")+"/aux"));
long start = System.currentTimeMillis();
try {
if (!job.waitForCompletion(true)) {
throw new Exception();
}
} catch (AssertionError e) {
throw e;
}
System.out.println("Real time to generate activity: "+(System.currentTimeMillis() - start)/1000.0f);
try{
fs.delete(new Path(rankedFileName), true);
fs.delete(new Path(conf.get("ldbc.snb.datagen.serializer.hadoopDir")+"/aux"),true);
} catch(IOException e) {
System.err.println(e.getMessage());
e.printStackTrace();
}
}
}