package com.neverwinterdp.scribengin.dataflow.test; import java.util.Random; import com.beust.jcommander.ParametersDelegate; import com.neverwinterdp.scribengin.Record; import com.neverwinterdp.scribengin.ScribenginClient; import com.neverwinterdp.scribengin.client.shell.ScribenginShell; import com.neverwinterdp.scribengin.dataflow.DataflowDescriptor; import com.neverwinterdp.scribengin.dataflow.DataflowTaskContext; import com.neverwinterdp.scribengin.event.ScribenginWaitingEventListener; import com.neverwinterdp.scribengin.scribe.ScribeAbstract; import com.neverwinterdp.scribengin.storage.StorageDescriptor; import com.neverwinterdp.util.JSONSerializer; public class KafkaToHdfsDataflowTest extends DataflowTest { @ParametersDelegate private DataflowSourceGenerator sourceGenerator = new DataflowKafkaSourceGenerator(); protected void doRun(ScribenginShell shell) throws Exception { long start = System.currentTimeMillis(); ScribenginClient scribenginClient = shell.getScribenginClient(); sourceGenerator.init(scribenginClient); sourceGenerator.runInBackground(); DataflowDescriptor dflDescriptor = new DataflowDescriptor(); dflDescriptor.setName("hello-kafka-hdfs-dataflow"); dflDescriptor.setNumberOfWorkers(numOfWorkers); dflDescriptor.setTaskMaxExecuteTime(taskMaxExecuteTime); dflDescriptor.setNumberOfExecutorsPerWorker(numOfExecutorPerWorker); dflDescriptor.setScribe(TestCopyScribe.class.getName()); dflDescriptor.setSourceDescriptor(sourceGenerator.getSourceDescriptor()); StorageDescriptor defaultSink = new StorageDescriptor("HDFS", getDataDir() + "/sink"); dflDescriptor.addSinkDescriptor("default", defaultSink); StorageDescriptor invalidSink = new StorageDescriptor("HDFS", getDataDir() + "/invalid-sink"); dflDescriptor.addSinkDescriptor("invalid", invalidSink); System.out.println(JSONSerializer.INSTANCE.toString(dflDescriptor)) ; ScribenginWaitingEventListener waitingEventListener = scribenginClient.submit(dflDescriptor); shell.console().println("Wait time to finish: " + duration + "ms"); Thread dataflowInfoThread = newPrintDataflowThread(shell, dflDescriptor); dataflowInfoThread.start(); waitingEventListener.waitForEvents(duration); shell.console().println("The test executed time: " + (System.currentTimeMillis() - start) + "ms"); dataflowInfoThread.interrupt(); DataflowTestReport report = new DataflowTestReport() ; sourceGenerator.populate(report); report.report(System.out); //TODO: Implemement and test the juniReport method junitReport(report); } private String getDataDir() { return "./build/hdfs"; } static public class TestCopyScribe extends ScribeAbstract { private int count = 0; private Random random = new Random(); @Override public void process(Record record, DataflowTaskContext ctx) throws Exception { if(random.nextDouble() < 0.8) { ctx.append(record); System.out.println("Write default"); } else { ctx.write("invalid", record); System.out.println("Write invalid"); } count++ ; if(count == 100) { ctx.commit(); count = 0; } } } }