package com.neverwinterdp.scribengin.dataflow;
import org.apache.hadoop.fs.FileSystem;
import com.neverwinterdp.scribengin.ScribenginClient;
import com.neverwinterdp.scribengin.event.ScribenginWaitingEventListener;
import com.neverwinterdp.scribengin.storage.hdfs.HDFSSourceGenerator;
import com.neverwinterdp.scribengin.storage.StorageDescriptor;
import com.neverwinterdp.util.JSONSerializer;
public class HelloHDFSDataflowBuilder {
private String dataDir ;
private FileSystem fs ;
private int numOfWorkers = 3;
private int numOfExecutorPerWorker = 3;
private ScribenginClient scribenginClient;
public HelloHDFSDataflowBuilder(ScribenginClient scribenginClient, FileSystem fs, String dataDir) {
this.scribenginClient = scribenginClient;
this.fs = fs ;
this.dataDir = dataDir ;
}
public void setNumOfWorkers(int numOfWorkers) {
this.numOfWorkers = numOfWorkers;
}
public void setNumOfExecutorPerWorker(int numOfExecutorPerWorker) {
this.numOfExecutorPerWorker = numOfExecutorPerWorker;
}
public ScribenginWaitingEventListener submit() throws Exception {
DataflowDescriptor dflDescriptor = new DataflowDescriptor();
dflDescriptor.setName("hello-hdfs-dataflow");
dflDescriptor.setNumberOfWorkers(numOfWorkers);
dflDescriptor.setNumberOfExecutorsPerWorker(numOfExecutorPerWorker);
dflDescriptor.setScribe(TestCopyDataProcessor.class.getName());
StorageDescriptor sourceDescriptor = new StorageDescriptor("HDFS", dataDir + "/source") ;
dflDescriptor.setSourceDescriptor(sourceDescriptor);
StorageDescriptor defaultSink = new StorageDescriptor("HDFS", dataDir + "/sink");
dflDescriptor.addSinkDescriptor("default", defaultSink);
StorageDescriptor invalidSink = new StorageDescriptor("HDFS", dataDir + "/invalid-sink");
dflDescriptor.addSinkDescriptor("invalid", invalidSink);
System.out.println(JSONSerializer.INSTANCE.toString(dflDescriptor)) ;
return scribenginClient.submit(dflDescriptor) ;
}
public void createSource(int numOfStream, int numOfBuffer, int numOfRecordPerBuffer) throws Exception {
HDFSSourceGenerator generator = new HDFSSourceGenerator();
generator.generateSource(fs, dataDir + "/source");
}
}