package com.neverwinterdp.scribengin.dataflow.test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParametersDelegate;
import com.neverwinterdp.scribengin.Record;
import com.neverwinterdp.scribengin.ScribenginClient;
import com.neverwinterdp.scribengin.client.shell.ScribenginShell;
import com.neverwinterdp.scribengin.dataflow.DataflowDescriptor;
import com.neverwinterdp.scribengin.dataflow.DataflowTaskContext;
import com.neverwinterdp.scribengin.event.ScribenginWaitingEventListener;
import com.neverwinterdp.scribengin.scribe.ScribeAbstract;
import com.neverwinterdp.scribengin.storage.StorageDescriptor;
import com.neverwinterdp.scribengin.storage.hdfs.HDFSSourceGenerator;
import com.neverwinterdp.util.JSONSerializer;
public class HdfsToKafkaDataflowTest extends DataflowTest {
@ParametersDelegate
private DataflowSinkValidator sinkValidator = new DataflowKafkaSinkValidator();
@Parameter(names = "--sink-topic", description = "Default sink topic")
public String DEFAULT_SINK_TOPIC = "hello.sink.default" ;
protected void doRun(ScribenginShell shell) throws Exception {
long start = System.currentTimeMillis();
FileSystem fs = FileSystem.getLocal(new Configuration());
ScribenginClient scribenginClient = shell.getScribenginClient();
sinkValidator.init(scribenginClient);
new HDFSSourceGenerator().generateSource(fs, getDataDir() + "/source");
DataflowDescriptor dflDescriptor = new DataflowDescriptor();
dflDescriptor.setName("hello-hdfs-kafka-dataflow");
dflDescriptor.setNumberOfWorkers(numOfWorkers);
dflDescriptor.setNumberOfExecutorsPerWorker(numOfExecutorPerWorker);
dflDescriptor.setScribe(TestCopyScribe.class.getName());
StorageDescriptor storageDescriptor = new StorageDescriptor("HDFS", getDataDir() + "/source") ;
dflDescriptor.setSourceDescriptor(storageDescriptor);
dflDescriptor.addSinkDescriptor("default", sinkValidator.getSinkDescriptor());
System.out.println(JSONSerializer.INSTANCE.toString(dflDescriptor)) ;
ScribenginWaitingEventListener waitingEventListener = scribenginClient.submit(dflDescriptor);
shell.console().println("Wait time to finish: " + duration + "ms");
Thread dataflowInfoThread = newPrintDataflowThread(shell, dflDescriptor);
dataflowInfoThread.start();
waitingEventListener.waitForEvents(duration);
shell.console().println("The test executed time: " + (System.currentTimeMillis() - start) + "ms");
dataflowInfoThread.interrupt();
//sinkValidator.setExpectRecords(sourceGenerator.getNumberOfGeneratedRecords());
sinkValidator.run();
sinkValidator.waitForTermination();
DataflowTestReport report = new DataflowTestReport() ;
//sourceGenerator.populate(report);
sinkValidator.populate(report);
report.report(System.out);
//TODO: Implemement and test the juniReport method
junitReport(report);
}
private String getDataDir() {
return "./build/hdfs";
}
static public class TestCopyScribe extends ScribeAbstract {
private int count = 0;
@Override
public void process(Record record, DataflowTaskContext ctx) throws Exception {
ctx.append(record);
count++ ;
if(count == 100) {
ctx.commit();
count = 0;
}
}
}
}