package com.neverwinterdp.scribengin.dataflow.test; import java.util.Random; import com.neverwinterdp.scribengin.Record; import com.neverwinterdp.scribengin.ScribenginClient; import com.neverwinterdp.scribengin.dataflow.DataflowClient; import com.neverwinterdp.scribengin.dataflow.DataflowDescriptor; import com.neverwinterdp.scribengin.dataflow.DataflowTaskContext; import com.neverwinterdp.scribengin.event.ScribenginWaitingEventListener; import com.neverwinterdp.scribengin.scribe.ScribeAbstract; import com.neverwinterdp.scribengin.storage.StorageDescriptor; import com.neverwinterdp.util.JSONSerializer; public class HelloHDFSDataflowBuilder { private String dataDir ; private int numOfWorkers = 3; private int numOfExecutorPerWorker = 3; private DataflowClient dataflowClient ; private ScribenginClient scribenginClient; public HelloHDFSDataflowBuilder(ScribenginClient scribenginClient, String dataDir) { this.scribenginClient = scribenginClient; this.dataDir = dataDir ; } public void setNumOfWorkers(int numOfWorkers) { this.numOfWorkers = numOfWorkers; } public void setNumOfExecutorPerWorker(int numOfExecutorPerWorker) { this.numOfExecutorPerWorker = numOfExecutorPerWorker; } public ScribenginWaitingEventListener submit() throws Exception { DataflowDescriptor dflDescriptor = new DataflowDescriptor(); dflDescriptor.setName("hello-hdfs-dataflow"); dflDescriptor.setNumberOfWorkers(numOfWorkers); dflDescriptor.setNumberOfExecutorsPerWorker(numOfExecutorPerWorker); dflDescriptor.setScribe(TestCopyScribe.class.getName()); StorageDescriptor storageDescriptor = new StorageDescriptor("HDFS", dataDir + "/source") ; dflDescriptor.setSourceDescriptor(storageDescriptor); StorageDescriptor defaultSink = new StorageDescriptor("HDFS", dataDir + "/sink"); dflDescriptor.addSinkDescriptor("default", defaultSink); StorageDescriptor invalidSink = new StorageDescriptor("HDFS", dataDir + "/invalid-sink"); dflDescriptor.addSinkDescriptor("invalid", invalidSink); System.out.println(JSONSerializer.INSTANCE.toString(dflDescriptor)) ; return scribenginClient.submit(dflDescriptor) ; } static public class TestCopyScribe extends ScribeAbstract { private int count = 0; private Random random = new Random(); @Override public void process(Record record, DataflowTaskContext ctx) throws Exception { if(random.nextDouble() < 0.8) { ctx.append(record); //System.out.println("Write default"); } else { ctx.write("invalid", record); //System.out.println("Write invalid"); } count++ ; if(count == 100) { ctx.commit(); count = 0; } } } }