/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* * This program exercises the Kafka import capability by inserting * <key, value> pairs into both a VoltDB table -- KAFKAMIRRORTABLE1, and * a export table -- KAFKAEXPORTTABLE1. The export table links to a * topic in a Kafka complex. The deployment file also add an import from * that same complex. The checking proceeds in parallel, checking * the rows in the KAFKAIMPORTTABLE1 with rows in the mirror table. * Matching rows are deleted from both tables. Separate threads check * statistics on both the export table and import table to determine when * both import and export activity have quiesced. At the end of a * successful run, both the import table and the mirror table are empty. * If there are rows left in the mirror table, then not all exported * rows have made the round trip back to the import table, or there might * be data corruption causing the match process to fail. * * 8/31: add option to populate topic externally, called "pounder" since * pounder (pounder.groovy) is a simple script commonly used to populate * a topic quickly. */ package client.kafkaimporter; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Timer; import java.util.TimerTask; import java.util.concurrent.atomic.AtomicLong; import org.voltcore.logging.VoltLogger; import org.voltdb.CLIConfig; import org.voltdb.VoltType; import org.voltdb.client.Client; import org.voltdb.client.ClientConfig; import org.voltdb.client.ClientFactory; import org.voltdb.client.ClientStats; import org.voltdb.client.ClientStatsContext; import org.voltdb.client.NoConnectionsException; import com.google_voltpatches.common.base.Splitter; import com.google_voltpatches.common.net.HostAndPort; public class KafkaImportBenchmark { static VoltLogger log = new VoltLogger("Benchmark"); // handy, rather than typing this out several times static final String HORIZONTAL_RULE = "----------" + "----------" + "----------" + "----------" + "----------" + "----------" + "----------" + "----------"; static final int SUCCESSES = 0; static final int FAILURES = 1; static final int OUTSTANDING_REQUESTS = 2; static final int RETRIES = 3; // Statistics manager objects from the client static ClientStatsContext periodicStatsContext; // validated command line configuration static Config config; // Timer for periodic stats printing static Timer statsTimer; static Timer checkTimer; // Benchmark start time long benchmarkStartTS; final ClientStatsContext fullStatsContext; static final Map<HostAndPort, OutputStream> haplist = new HashMap<HostAndPort, OutputStream>(); static Client client; // Some thread safe counters for reporting AtomicLong linesRead = new AtomicLong(0); // count of rows successfully exported static AtomicLong rowsAdded = new AtomicLong(0); // count of rows queued to export static final AtomicLong finalInsertCount = new AtomicLong(0); private static final int END_WAIT = 10; // wait at the end for import to settle after export completes private static final int PAUSE_WAIT = 10; // wait for server resume from pause mode private static String RUNNING_STATE = "Running"; static List<Integer> importProgress = new ArrayList<Integer>(); static InsertExport exportProc; static TableChangeMonitor exportMon; static TableChangeMonitor importMon; static MatchChecks matchChecks; /** * Uses included {@link CLIConfig} class to * declaratively state command line options with defaults * and validation. */ static class Config extends CLIConfig { @Option(desc = "Interval for performance feedback, in seconds.") long displayinterval = 5; @Option(desc = "Benchmark duration, in seconds.") int duration = 300; @Option(desc = "Maximum export TPS rate for benchmark.") int ratelimit = Integer.MAX_VALUE; @Option(desc = "Comma separated list of the form server[:port] to connect to for database queries") String servers = "localhost"; @Option(desc = "Number of rows to expect to import from the Kafka topic") long expected_rows = 10_000_000; @Option(desc = "Report latency for kafka benchmark run.") boolean latencyreport = false; @Option(desc = "Test using all VoltDB datatypes (except varbin).") boolean alltypes = false; @Option(desc = "Set to true to use voltdb export instead of groovy loader to populate kafka topic(s).") boolean useexport = false; @Option(desc = "Filename to write raw summary statistics to.") String statsfile = ""; @Option(desc = "Are we running the multi-stream/nmulti topic test?") boolean streamtest = false; @Option(desc = "Number of streams and topics we're importing.") int streams = 1; @Override public void validate() { if (duration <= 0) exitWithMessageAndUsage("duration must be > 0"); if (ratelimit <= 0) exitWithMessageAndUsage("ratelimit must be > 0"); // 0, means we're not expecting any rows -- part of new offset checking test // if (expected_rows <= 0) exitWithMessageAndUsage("row number must be > 0"); if (!useexport && alltypes) exitWithMessageAndUsage("groovy loader and alltypes are mutually exclusive"); if (displayinterval <= 0) exitWithMessageAndUsage("displayinterval must be > 0"); log.info("finished validating args"); } } /** * Constructor for benchmark instance. * Configures VoltDB client and prints configuration. * * @param config Parsed & validated CLI options. */ public KafkaImportBenchmark(Config config) { this.config = config; periodicStatsContext = client.createStatsContext(); fullStatsContext = client.createStatsContext(); log.info(HORIZONTAL_RULE); log.info(" Command Line Configuration"); log.info(HORIZONTAL_RULE); log.info(config.getConfigDumpString()); if(config.latencyreport) { log.warn("Option latencyreport is ON for async run, please set a reasonable ratelimit.\n"); } } /** * Connect to one or more VoltDB servers. * * @param servers A comma separated list of servers using the hostname:port * syntax (where :port is optional). Assumes 21212 if not specified otherwise. * @throws InterruptedException if anything bad happens with the threads. */ static void dbconnect(String servers, int ratelimit) throws InterruptedException, Exception { final Splitter COMMA_SPLITTER = Splitter.on(",").omitEmptyStrings().trimResults(); log.info("Connecting to VoltDB Interface..."); ClientConfig clientConfig = new ClientConfig(); clientConfig.setMaxTransactionsPerSecond(ratelimit); clientConfig.setReconnectOnConnectionLoss(true); client = ClientFactory.createClient(clientConfig); for (String server: COMMA_SPLITTER.split(servers)) { log.info("..." + server); client.createConnection(server); } } /** * Create a Timer task to display performance data on the Vote procedure * It calls printStatistics() every displayInterval seconds */ public static void schedulePeriodicStats() { statsTimer = new Timer("periodicStats", true); TimerTask statsPrinting = new TimerTask() { @Override public void run() { printStatistics(); } }; statsTimer.scheduleAtFixedRate(statsPrinting, config.displayinterval * 1000, config.displayinterval * 1000); } /** * Prints a one line update on performance that can be printed * periodically during a benchmark. */ public synchronized static void printStatistics() { try { ClientStats stats = periodicStatsContext.fetchAndResetBaseline().getStats(); long thrup; thrup = stats.getTxnThroughput(); long rows = MatchChecks.getExportRowCount(client); if (rows == VoltType.NULL_BIGINT) rows = 0; log.info("Importer stats: " + MatchChecks.getImportStats(client)); log.info(String.format("Export Throughput %d/s, Total Rows %d, Aborts/Failures %d/%d, Avg/95%% Latency %.2f/%.2fms", thrup, rows, stats.getInvocationAborts(), stats.getInvocationErrors(), stats.getAverageLatency(), stats.kPercentileLatencyAsDouble(0.95))); } catch (Exception ex) { log.error("Exception in printStatistics", ex); } } protected static void scheduleCheckTimer() { final Timer timer = new Timer("checkTimer", true); final long period = config.displayinterval; timer.scheduleAtFixedRate(new TimerTask() { @Override public void run() { long count = 0; if (!config.useexport) { count = MatchChecks.getImportTableRowCount(config.alltypes, client); // imported count } else { count = MatchChecks.getImportRowCount(client); // deleted count } importProgress.add((int) count); if (config.alltypes) { // for alltypes, if a column in mirror doesn't match import, key will be a row key, and non-zero long key = MatchChecks.checkRowMismatch(client); if (key != 0) { log.error("Import value mismatch at row " + key + ". Exiting."); System.exit(-1); } } int sz = importProgress.size(); if (sz > 1) { log.info("Import Throughput " + (count - importProgress.get(sz - 2)) / period + "/s, Total Rows: " + count); } log.info("Import stats: " + MatchChecks.getImportStats(client)); } }, config.displayinterval * 1000, config.displayinterval * 1000); } /** * Core benchmark code. * Connect. Initialize. Run the loop. Cleanup. Print Results. * * @throws Exception if anything unexpected happens. */ public void runBenchmark() throws Exception { log.info(HORIZONTAL_RULE); log.info(" Setup & Initialization"); log.info(HORIZONTAL_RULE); log.info(HORIZONTAL_RULE); log.info("Starting Benchmark"); log.info(HORIZONTAL_RULE); long icnt = 0; try { // print periodic statistics to the console schedulePeriodicStats(); scheduleCheckTimer(); // Run the benchmark loop for the requested duration // The throughput may be throttled depending on client configuration // Save the key/value pairs so they can be verified through the database log.info("Running benchmark..."); final long benchmarkEndTime = System.currentTimeMillis() + (1000l * config.duration); while (benchmarkEndTime > System.currentTimeMillis()) { long value = System.currentTimeMillis(); long key = icnt; exportProc.insertExport(key, value); icnt++; } } catch (Exception ex) { log.error("Exception in Benchmark", ex); } finally { log.info("Benchmark ended, exported " + icnt + " rows."); // cancel periodic stats printing statsTimer.cancel(); finalInsertCount.addAndGet(icnt); } } public static class BenchmarkRunner extends Thread { private final KafkaImportBenchmark benchmark; public BenchmarkRunner(KafkaImportBenchmark bm) { benchmark = bm; } @Override public void run() { try { if (config.useexport) { schedulePeriodicStats(); benchmark.runBenchmark(); } scheduleCheckTimer(); } catch (Exception ex) { log.error("Exception in benchmark", ex); System.exit(-1); } } } public static boolean verifyZero() { long count = -1; log.info("Checking for zero rows in KAFKAIMPORTTABLE1"); log.info("Wait for 1 minute for import to settle"); try { Thread.sleep(1 * 1000 * 60); // wait for 1 minute } catch (InterruptedException e) { e.printStackTrace(); } try { count = MatchChecks.getImportTableRowCount(false, client); // imported count } catch (Exception e) { e.printStackTrace(); } if (count == 0) return true; else return false; } public static void endTest(boolean testResult, Config config) { // Write stats to file if requested try { if ((config.statsfile != null) && (config.statsfile.length() != 0)) { log.info("Stats file: " + config.statsfile); FileWriter fw = new FileWriter(config.statsfile); // stats: row count, latest time, earliest time long[] stats = MatchChecks.getStats(client); log.info("rows: " + stats[0] + ". End timestamp: " + stats[1] + ". Start timestamp: " + stats[2]); // Date date = new Date(stats[2]); // LocalDateTime.ofInstant(Instant.ofEpochMilli(stats[2]*1000), ZoneId.systemDefault()); double tps = stats[0] / ((double)stats[1] - (double)stats[2]); log.info("TPS: " + tps); log.info("Stats string: " + String.format("%d,%d,%d,%d,%d,%d,%d,0,0,0,0,0,0\n", stats[2], config.duration, 0, 0, 0, 0, (long)tps)); fw.append(String.format("%d,%d,%d,%d,%d,%d,%d,0,0,0,0,0,0\n", stats[2], (stats[1]-stats[2])*1000, stats[0], 0, 0, 0, 0 )); fw.close(); } } catch (IOException e) { System.err.println("Error writing stats file"); e.printStackTrace(); } try { client.drain(); client.close(); } catch (NoConnectionsException | InterruptedException e) { e.printStackTrace(); } if (testResult == true) { log.info("Test passed!"); System.exit(0); } else { log.info("Test failed!"); System.exit(1); } } /** * Main routine creates a benchmark instance and kicks off the run method. * * @param args Command line arguments. * @throws Exception if anything goes wrong. */ public static void main(String[] args) throws Exception { VoltLogger log = new VoltLogger("Benchmark.main"); boolean testResult = true; // create a configuration from the arguments Config config = new Config(); config.parse(KafkaImportBenchmark.class.getName(), args); // connect to one or more servers, method loops until success dbconnect(config.servers, config.ratelimit); // special case for second half of offset check test. // we expect no rows, and give the import subsystem about a // minute to settle if (config.expected_rows == 0) { testResult = verifyZero(); endTest(testResult, config); } // instance handles inserts to Kafka export table and its mirror DB table exportProc = new InsertExport(config.alltypes, client, rowsAdded); log.info("Starting KafkaImportBenchmark..."); KafkaImportBenchmark benchmark = new KafkaImportBenchmark(config); BenchmarkRunner runner = new BenchmarkRunner(benchmark); runner.start(); runner.join(); // writers are done long exportRowCount = 0; if (config.useexport) { exportRowCount = MatchChecks.getExportRowCount(client); log.info("Export phase complete, " + exportRowCount + " rows exported, waiting for import to drain..."); } // final check time since the import and export tables have quiesced. // check that the mirror table is empty. If not, that indicates that // not all the rows got to Kafka or not all the rows got imported back. do { Thread.sleep(END_WAIT * 1000); //} // importProgress is an array of sampled counts of the importedcounts table, showing import progress // samples are recorded by the checkTimer thread } while (!RUNNING_STATE.equalsIgnoreCase(MatchChecks.getClusterState(client)) || importProgress.size() < 4 || importProgress.get(importProgress.size()-1) > importProgress.get(importProgress.size()-2) || importProgress.get(importProgress.size()-1) > importProgress.get(importProgress.size()-3) || importProgress.get(importProgress.size()-1) > importProgress.get(importProgress.size()-4) ); long[] importStatValues = MatchChecks.getImportValues(client); long mirrorRows = 0; if (!config.streamtest) mirrorRows = MatchChecks.getMirrorTableRowCount(config.alltypes, client); long importRows = MatchChecks.getImportTableRowCount(config.alltypes, client); long importRowCount = 0; if (!config.streamtest) importRowCount = MatchChecks.getImportRowCount(client); // in case of pause / resume tweak, let it drain longer int trial = 3; while (!RUNNING_STATE.equalsIgnoreCase(MatchChecks.getClusterState(client)) || ((--trial > 0) && ((importStatValues[OUTSTANDING_REQUESTS] > 0) || (importRows < config.expected_rows)))) { Thread.sleep(PAUSE_WAIT * 1000); importStatValues = MatchChecks.getImportValues(client); if (!config.streamtest) mirrorRows = MatchChecks.getMirrorTableRowCount(config.alltypes, client); importRows = MatchChecks.getImportTableRowCount(config.alltypes, client); // importRowCount = MatchChecks.getImportRowCount(client); } // some counts that might help debugging.... log.info("importer outstanding requests: " + importStatValues[OUTSTANDING_REQUESTS]); log.info("importRows: " + importRows); if (!config.streamtest) { log.info("mirrorRows: " + mirrorRows); log.info("importRowCount: " + importRowCount); } if (config.useexport) { log.info("exportRowCount: " + exportRowCount); } if (config.useexport) { log.info("Total rows exported: " + finalInsertCount); log.info("Unmatched Rows remaining in the export Mirror Table: " + mirrorRows); log.info("Unmatched Rows received from Kafka to Import Table (duplicate rows): " + importRows); if (mirrorRows != 0) { log.error(mirrorRows + " Rows are missing from the import stream, failing test"); testResult = false; } } if ((exportRowCount != (importStatValues[SUCCESSES] + importStatValues[FAILURES])) && config.useexport) { log.error("Export count '" + exportRowCount + "' does not match import stats count '" + (importStatValues[SUCCESSES] + importStatValues[FAILURES]) + "' test fails."); testResult = false; } if (!config.useexport && !config.streamtest) { testResult = MatchChecks.checkPounderResults(config.expected_rows, client); } endTest(testResult, config); } }