/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* * Write n rows of random data (per schema) to m topics, one thread per topic. * * Lots of options to control number of topics, number of rows, cycles, etc. * See options below rather than depend on a soon to be out of date comment. */ package client.kafkaimporter; import java.util.ArrayList; import java.util.List; import java.util.Properties; import java.util.Random; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.serialization.StringSerializer; import org.json_voltpatches.JSONObject; import org.voltcore.logging.VoltLogger; import org.voltdb.CLIConfig; import com.google_voltpatches.common.util.concurrent.RateLimiter; public class Producer extends Thread { static VoltLogger log = new VoltLogger("KafkaImporter.Producer"); KafkaProducer<String,String> m_producer; String m_topic; String m_servers; long m_rate; int m_cycletime; int m_pausetime; long m_rows; long m_cycles; long m_rangemin; long m_rangemax; String m_compression; boolean m_producerrunning = false; JSONObject m_json_obj; // Validated CLI config KafkaProducerConfig config; public Producer(KafkaProducerConfig config, int topicnum) { // TODO: add topic check/create, with appropriate replication & partitioning // Meanwhile topic creation is done in kafkautils.py from the runapp.py // or if auto-create is enabled in the Kafka cluster properties this.config = config; m_topic = config.topic; m_servers = config.brokers; m_rate = config.producerrate; m_cycletime = config.cycletime; // if (topicnum % 2 == 0) // alternate compression strategies, if any // m_compression = config.compression; // else // m_compression = "none"; if (config.compression.equals("all")) m_compression = KafkaProducerConfig.compression_types.split(" ")[topicnum%4]; else m_compression = config.compression; log.info("Topic " + topicnum + " compression: " + m_compression); m_pausetime = (int) (config.pausetime * Math.random()); // let each thread have its own wait time between 0 and pausetime m_rows = config.totalrows; long possiblecycles = m_rows / (m_rate * m_cycletime); m_cycles = (possiblecycles > config.cycles) ? possiblecycles : config.cycles; m_rangemin = m_rows * topicnum; // offset the start so keys don't overlap and cause constraint violations // create distinct topic name <m_topic><topicnum> m_topic = m_topic + topicnum; m_json_obj = new JSONObject(); Properties props = new Properties(); props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, m_servers); props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, m_compression); // compression.type props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); props.put(ProducerConfig.ACKS_CONFIG, "0"); m_producer = new KafkaProducer<String,String>(props); log.info("Instantiate Producer: " + m_topic + ", " + m_servers + ", " + m_rate + ", " + m_cycletime + ", " + m_pausetime + ", " + m_rows); } @Override public void run() { Random rand = new Random(); Long rowCnt = new Long(m_rangemin); // starting value for key for each topic producer final RateLimiter rateLimiter = RateLimiter.create(m_rate); m_producerrunning = true; for (int cycle = 0; cycle < m_cycles; cycle++) { log.info("Kafka producer: starting cycle " + cycle + " to produce " + (m_rows/m_cycles) + " rows at row index " + rowCnt + "."); for (long rowsincycle = m_rangemin; rowsincycle < (m_rows/m_cycles+m_rangemin); rowsincycle++) { Long value = System.currentTimeMillis(); rateLimiter.acquire(); SampleRecord record = new SampleRecord(rowCnt, 1000, rand); ProducerRecord<String, String> producerRecord = new ProducerRecord<String, String>(m_topic, rowCnt.toString(), record.obj.toString()); // log.info("JSON Row: " + producerRecord.toString()); m_producer.send(producerRecord); rowCnt++; } try { log.info("...Starting pause between cycles -- " + m_pausetime + " seconds."); m_producerrunning = false; Thread.sleep(m_pausetime*1000); m_producerrunning = true; } catch (InterruptedException e) { e.printStackTrace(); } } m_producerrunning = false; } void shutdown() { m_producer.close(); } boolean is_ProducerRunning() { return m_producerrunning; } /** * Uses included {@link CLIConfig} class to * declaratively state command line options with defaults * and validation. */ static class KafkaProducerConfig extends CLIConfig { static final String compression_types = "none gzip snappy lz4"; @Option(desc = "Kafka topic name <topicbase><number>") String topic = "TOPIC"; @Option(desc = "Kafka broker list, server:port,...") String brokers = ""; @Option(desc = "Number of Kafka topics.") int ntopics = 1; // 1 means 1 topic: TOPIC0, for example @Option(desc = "Rate in rows per second") int producerrate = 1_000_000; @Option(desc = "Cycle Time in seconds.") int cycletime = 60; @Option (desc = "Pause time in seconds.") int pausetime = 10; @Option(desc = "Total rows in rows.") int totalrows = 6_000_000; @Option(desc = "Number of producer cycles") int cycles = 5; @Option(desc = "Compression codec: none, gzip, snappy, lz4 or all to cycle through choices.") String compression = "all"; @Override public void validate() { if (ntopics == 0) ntopics = 1; if (topic.length() <= 0) exitWithMessageAndUsage("Topic name required"); if (brokers.length() < 0) exitWithMessageAndUsage("Broker list required"); if (producerrate <= 0) exitWithMessageAndUsage("Producer rate must be > 0"); if (cycletime <= 0) exitWithMessageAndUsage("Cycle time must be > 0"); if (pausetime <= 0) exitWithMessageAndUsage("Pause time must be > 0"); if (totalrows <= 0) exitWithMessageAndUsage("Total rows must be > 0"); if (cycles <= 0) exitWithMessageAndUsage("Cycle count must be > 0"); if (! compression_types.contains(compression) && !compression.equals("all")) exitWithMessageAndUsage("Compression value unknown"); } } public static void main(String[] args) { KafkaProducerConfig config = new KafkaProducerConfig(); config.parse(Producer.class.getName(), args); System.out.println(config.getConfigDumpString()); List<Producer> producers = new ArrayList<Producer>(); for (int topic = 0; topic < config.ntopics; topic++) { Producer producer = new Producer(config, topic); producer.start(); producers.add(producer); } try { int t = 0; for (Producer p : producers) { p.join(); System.out.println("Thread " + t + " done."); t++; } } catch (InterruptedException e) { e.printStackTrace(); } System.out.println("All threads done."); } }