package com.neverwinterdp.scribengin.dataflow.test; import java.util.concurrent.atomic.AtomicLong; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.neverwinterdp.kafka.tool.KafkaMessageSendTool; import com.neverwinterdp.kafka.tool.KafkaTool; import com.neverwinterdp.registry.RegistryConfig; import com.neverwinterdp.scribengin.Record; import com.neverwinterdp.scribengin.ScribenginClient; import com.neverwinterdp.scribengin.client.shell.ScribenginShell; import com.neverwinterdp.scribengin.dataflow.DataflowDescriptor; import com.neverwinterdp.scribengin.dataflow.test.HelloKafkaDataflowBuilder.TestCopyScribe; import com.neverwinterdp.scribengin.event.ScribenginWaitingEventListener; import com.neverwinterdp.scribengin.storage.StorageDescriptor; import com.neverwinterdp.scribengin.storage.s3.S3Client; import com.neverwinterdp.tool.message.MessageGenerator; import com.neverwinterdp.util.JSONSerializer; public class KafkaToS3DataflowTest extends DataflowTest { @Parameter(names = "--source-topic", description = "Source topic") public String SOURCE_TOPIC = "hello.source"; @Parameter(names = "--flow-name", description = "Invalid sink topic") private String name = "hello"; @Parameter(names = "--kafka-write-period", description = "The write period for each partition in ms") private int writePeriod = 10; @Parameter(names = "--kafka-num-partition", description = "Number of the partitions") private int numPartitions = 5; @Parameter(names = "--kafka-max-message-per-partition", description = "Number of the partitions") private int maxMessagePerPartition = 100; static public String BUCKET_NAME = "nellouze"; static public String DEFAULT_STORAGE_PATH = "default"; static public String INVALID_STORAGE_PATH = "invalid"; static public String REGION_Name = "eu-central-1"; protected void doRun(ScribenginShell shell) throws Exception { S3Client s3Client = new S3Client(REGION_Name); s3Client.onInit(); if (s3Client.hasBucket(BUCKET_NAME)) { s3Client.deleteBucket(BUCKET_NAME, true); } s3Client.createBucket(BUCKET_NAME); s3Client.createS3Folder(BUCKET_NAME, DEFAULT_STORAGE_PATH); for (int i = 0; i < 10; i++) { s3Client.createS3Folder(BUCKET_NAME, DEFAULT_STORAGE_PATH + "/stream-" + i); } s3Client.createS3Folder(BUCKET_NAME, INVALID_STORAGE_PATH); for (int i = 0; i < 10; i++) { s3Client.createS3Folder(BUCKET_NAME, INVALID_STORAGE_PATH + "/stream-" + i); } long start = System.currentTimeMillis(); ScribenginClient scribenginClient = shell.getScribenginClient(); RegistryConfig registryConfig = scribenginClient.getRegistry().getRegistryConfig(); String zkConnect = registryConfig.getConnect(); String[] sendArgs = { "--topic", SOURCE_TOPIC, "--send-period", Integer.toString(writePeriod), "--num-partition", Integer.toString(numPartitions), "--send-max-per-partition", Integer.toString(maxMessagePerPartition), "--send-max-duration", Long.toString(duration), "--zk-connect", zkConnect }; KafkaMessageSendTool sendTool = new KafkaMessageSendTool(); new JCommander(sendTool, sendArgs); sendTool.setMessageGenerator(new KafkaMessageGeneratorRecord()); sendTool.runAsDeamon(); KafkaTool client = new KafkaTool(name, zkConnect); client.connect(); String brokerList = client.getKafkaBrokerList(); client.close(); DataflowDescriptor dflDescriptor = new DataflowDescriptor(); dflDescriptor.setName("hello-kafka-dataflow"); dflDescriptor.setNumberOfWorkers(numOfWorkers); dflDescriptor.setTaskMaxExecuteTime(taskMaxExecuteTime); dflDescriptor.setNumberOfExecutorsPerWorker(numOfExecutorPerWorker); dflDescriptor.setScribe(TestCopyScribe.class.getName()); StorageDescriptor storageDescriptor = new StorageDescriptor("KAFKA"); storageDescriptor.attribute("name", name); storageDescriptor.attribute("topic", SOURCE_TOPIC); storageDescriptor.attribute("zk.connect", zkConnect); storageDescriptor.attribute("broker.list", brokerList); dflDescriptor.setSourceDescriptor(storageDescriptor); StorageDescriptor defaultSink = new StorageDescriptor("s3"); defaultSink.attribute("s3.bucket.name", BUCKET_NAME); defaultSink.attribute("s3.region.name", REGION_Name); defaultSink.attribute("s3.storage.path", DEFAULT_STORAGE_PATH); dflDescriptor.addSinkDescriptor("default", defaultSink); StorageDescriptor invalidSink = new StorageDescriptor("s3"); invalidSink.attribute("s3.bucket.name", BUCKET_NAME); invalidSink.attribute("s3.region.name", REGION_Name); invalidSink.attribute("s3.storage.path", INVALID_STORAGE_PATH); dflDescriptor.addSinkDescriptor("invalid", defaultSink); ScribenginWaitingEventListener waitingEventListener = scribenginClient.submit(dflDescriptor); shell.console().println("Wait time to finish: " + duration + "ms"); Thread dataflowInfoThread = newPrintDataflowThread(shell, dflDescriptor); dataflowInfoThread.start(); waitingEventListener.waitForEvents(duration); shell.console().println("The test executed time: " + (System.currentTimeMillis() - start) + "ms"); dataflowInfoThread.interrupt(); } static public class KafkaMessageGeneratorRecord implements MessageGenerator { static public AtomicLong idTracker = new AtomicLong(); public byte[] nextMessage(int partition, int messageSize) { String key = "partition=" + partition + ",id=" + idTracker.getAndIncrement(); return JSONSerializer.INSTANCE.toString(new Record(key, new byte[messageSize])).getBytes(); } } }