/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.integration.tests; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.LineNumberReader; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.Callable; import org.testng.Assert; import org.testng.TestListenerAdapter; import org.testng.TestNG; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import com.linkedin.pinot.common.data.Schema; /** * A command line runner to invoke the HybridClusterScanComparisonIntegrationTest via command-line. * The arguments expected are as follows: * tableName: The name of the table in the queries. This is substituted with an internal table name * schemaFile : This is the full path of the file that has the pinot schema. * segQueryDirPath: Full path of a directory in which we expect to see the following (we don't pay attention to other stuff) * A directory called avro-files under which all the avro files must reside, with their names indicating the date ranges * A file called queries.txt that should have all the queries that we execute against this dataset. * timeColumnName : Name of the time column in the schema (e.g. "daysSinceEpoch") * timeColType : Type of the time col (e.g. "DAYS") * invIndexCols: A list of comma-separated column-names for inverted index. * sortedCol: The name of the sorted column to be used for bulding realtime segments * * The command can be invoked as follows: * CLASSPATH_PREFIX=pinot-integration-tests/target/pinot-integration-tests-*-tests.jar pinot-integration-tests/target/pinot-integration-tests-pkg/bin/pinot-hybrid-cluster-test.sh args... */ public class HybridScanBasedCommandLineTestRunner { public static void usage() { System.err.println("Usage: pinot-hybrid-cluster.sh [--llc] [--record] tableName schemaFilePath segQueryDirPath invIndexCols sortedCol"); System.exit(1); } public static void main(String[] args) throws Exception { if (args.length == 0) { usage(); } int expectedArgsLen = 5; int ix = 0; // Parse optional arguments first. while (args[ix].startsWith("-")) { if (args[ix].equals("--record")) { CustomHybridClusterScanComparisonIntegrationTest._recordScanResponses = true; } else if (args[ix].equals("--llc")) { CustomHybridClusterScanComparisonIntegrationTest._useLlc = true; } else { usage(); } ix++; expectedArgsLen++; } if (args.length != expectedArgsLen) { usage(); } final String tableName = args[ix++]; final String schemaFilePath = args[ix++]; final String segQueryDirPath = args[ix++]; // we expect a dir called 'avro-files' and files called 'queries.txt' and 'scan-responses.txt' in here final String invIndexCols = args[ix++]; final String sortedCol = args[ix++]; CustomHybridClusterScanComparisonIntegrationTest.setParams(tableName, schemaFilePath, segQueryDirPath, invIndexCols, sortedCol); TestListenerAdapter tla = new TestListenerAdapter(); TestNG testng = new TestNG(); testng.setTestClasses(new Class[]{CustomHybridClusterScanComparisonIntegrationTest.class}); testng.addListener(tla); testng.run(); System.out.println("Passed tests: " + tla.getPassedTests()); if (!tla.getSkippedTests().isEmpty()) { System.out.println("Skipped tests: " + tla.getSkippedTests()); } System.out.println(tla.toString()); if (!tla.getFailedTests().isEmpty()) { System.err.println("Failed tests:" + tla.getFailedTests()); System.exit(1); } System.exit(0); } public static class CustomHybridClusterScanComparisonIntegrationTest extends HybridClusterScanComparisonIntegrationTest { private static String _timeColName; private static String _timeColType; private static List<File> _avroFiles = new ArrayList<>(4); private static List<String> _invIndexCols = new ArrayList<>(4); private static String _tableName; private static String _queryFilePath; private static String _segsQueryDir; private static final String _logFileSuffix = "query-comparison.log"; private static final String QUERY_FILE_NAME = "queries.txt"; private static final String AVRO_DIR = "avro-files"; private static final String SCAN_RSP_FILE_NAME = "scan-responses.txt"; // Will have more number of lines than queries file private static File _schemaFile; private static String _sortedColumn; private static String _logFileName; private static boolean _inCmdLine = false; private static boolean _recordScanResponses = false; // Must be done in single-threaded mode if true private static boolean _useLlc = false; // Whether to use kafka low-level consumer private static boolean _compareWithRspFile = true; private static String _scanRspFilePath; private boolean _multiThreaded = true; private FileWriter _scanRspFileWriter; private LineNumberReader _scanRspFileReader; public static void setParams(String tableName, String schemaFileName, String segsQueryDir, String invIndexCols, String sortedCol) throws Exception { // TODO add some basic checks // TODO add params for single query _tableName = tableName; _queryFilePath = segsQueryDir + "/" + QUERY_FILE_NAME; _scanRspFilePath = segsQueryDir + "/" + SCAN_RSP_FILE_NAME; _sortedColumn = sortedCol; _segsQueryDir = segsQueryDir; File avroDir = new File(segsQueryDir + "/" + AVRO_DIR); File[] avroFiles = avroDir.listFiles(); for (File file : avroFiles) { if (!file.getName().matches("[0-9].*")) { throw new RuntimeException( "Avro file names must start with a digit that indicates starting time/day of avro events in the file"); } _avroFiles.add(file); } Collections.sort(_avroFiles); String[] colNames = invIndexCols.split(","); for (String colName : colNames) { _invIndexCols.add(colName); } _schemaFile = new File(schemaFileName); Schema schema = Schema.fromFile(_schemaFile); _timeColName = schema.getTimeColumnName(); _timeColType = schema.getIncomingTimeUnit().toString(); _logFileName = _tableName + "-" + System.currentTimeMillis() + "-" + _logFileSuffix; _inCmdLine = true; } @Override protected List<File> getAllAvroFiles() { return _avroFiles; } @Override public File getSchemaFile() { return _schemaFile; } @Override protected String getTimeColumnName() { return _timeColName; } @Override protected String getTimeColumnType() { return _timeColType; } @Override protected String getSortedColumn() { return _sortedColumn; } @Override protected int getKafkaPartitionCount() { if (_useLlc) { return 2; } else { return 10; } } @Override @BeforeClass public void setUp() throws Exception { if (!_inCmdLine) { return; } _nQueriesRead = 0; _createSegmentsInParallel = true; File scanRspFile = new File(_scanRspFilePath); if (_recordScanResponses) { _compareWithRspFile = false; if (scanRspFile.exists()) { throw new RuntimeException(_scanRspFilePath + " already exists"); } _scanRspFileWriter = new FileWriter(scanRspFile); _multiThreaded = false; } else { // Attempt to compare with a response file if it exists if (scanRspFile.exists()) { _scanRspFileReader = new LineNumberReader(new FileReader(scanRspFile)); } else { // Run live queries with the scan-based query runner. _compareWithRspFile = false; } } for (String col : _invIndexCols) { invertedIndexColumns.add(col); } super.setUp(); } @Override @AfterClass public void tearDown() throws Exception{ if (!_inCmdLine) { return; } if (_recordScanResponses) { _scanRspFileWriter.flush(); _scanRspFileWriter.close(); } super.tearDown(); } @Override protected boolean shouldUseLlc() { return _useLlc; } @Override protected long getStabilizationTimeMs() { return 5 * 60 * 1000L; } @Override protected int getAvroFileCount() { return _avroFiles.size(); } @Override protected FileWriter getScanRspRecordFileWriter() { return _scanRspFileWriter; } @Override protected FileWriter getLogWriter() throws IOException { return new FileWriter(new File(_logFileName)); } protected FileReader getQueryFile() throws FileNotFoundException { return new FileReader(_queryFilePath); } private String replaceTableName(String query) { return query.replace(_tableName, "mytable"); } @Test public void testQueriesFromLog() throws Exception { try { if (!_inCmdLine) { return; } runTestLoop(new Callable<Object>() { @Override public Object call() throws Exception { LineNumberReader queryReader = new LineNumberReader(getQueryFile()); for (; ; ) { String line = queryReader.readLine(); if (line == null) { break; } _nQueriesRead++; final String query = replaceTableName(line); String compareStr = null; if (_compareWithRspFile) { compareStr = _scanRspFileReader.readLine(); if (compareStr == null) { Assert.fail("Not enough lines in " + _scanRspFilePath); } } final String scanRsp = compareStr; if (_multiThreaded) { runQueryAsync(query, scanRsp); } else { runQuery(query, _scanBasedQueryProcessor, false, scanRsp); } } queryReader.close(); return null; } }, _multiThreaded); System.out.println( getNumSuccesfulQueries() + " Passed, " + getNumFailedQueries() + " Failed, " + getNumEmptyResults() + " empty results"); Assert.assertEquals(0, getNumFailedQueries(), "There were query failures. See " + _logFileName); } catch (Exception e) { System.out.println("Caught exception while running queries from log"); e.printStackTrace(); throw e; } } } }