HybridScanBasedCommandLineTestRunner.java example

Explorer
pinot-master
/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.integration.tests;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import org.testng.Assert;
import org.testng.TestListenerAdapter;
import org.testng.TestNG;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.linkedin.pinot.common.data.Schema;


/**
 * A command line runner to invoke the HybridClusterScanComparisonIntegrationTest via command-line.
 * The arguments expected are as follows:
 *    tableName: The name of the table in the queries. This is substituted with an internal table name
 *    schemaFile : This is the full path of the file that has the pinot schema.
 *    segQueryDirPath: Full path of a directory in which we expect to see the following (we don't pay attention to other stuff)
 *        A directory called avro-files under which all the avro files must reside, with their names indicating the date ranges
 *        A file called queries.txt that should have all the queries that we execute against this dataset.
 *    timeColumnName : Name of the time column in the schema (e.g. "daysSinceEpoch")
 *    timeColType :  Type of the time col (e.g. "DAYS")
 *    invIndexCols: A list of comma-separated column-names for inverted index.
 *    sortedCol: The name of the sorted column to be used for bulding realtime segments
 *
 * The command can be invoked as follows:
 *    CLASSPATH_PREFIX=pinot-integration-tests/target/pinot-integration-tests-*-tests.jar pinot-integration-tests/target/pinot-integration-tests-pkg/bin/pinot-hybrid-cluster-test.sh args...
 */
public class HybridScanBasedCommandLineTestRunner {

  public static void usage() {
    System.err.println("Usage: pinot-hybrid-cluster.sh [--llc] [--record] tableName schemaFilePath segQueryDirPath invIndexCols sortedCol");
    System.exit(1);
  }

  public static void main(String[] args) throws Exception {
    if (args.length == 0) {
      usage();
    }
    int expectedArgsLen = 5;
    int ix = 0;
    // Parse optional arguments first.
    while (args[ix].startsWith("-")) {
        if (args[ix].equals("--record")) {
          CustomHybridClusterScanComparisonIntegrationTest._recordScanResponses = true;
        } else if (args[ix].equals("--llc")) {
          CustomHybridClusterScanComparisonIntegrationTest._useLlc = true;
        } else {
          usage();
        }
      ix++;
      expectedArgsLen++;
    }

    if (args.length != expectedArgsLen) {
      usage();
    }

    final String tableName = args[ix++];
    final String schemaFilePath = args[ix++];
    final String segQueryDirPath = args[ix++]; // we expect a dir called 'avro-files' and files called 'queries.txt' and 'scan-responses.txt' in here
    final String invIndexCols = args[ix++];
    final String sortedCol = args[ix++];

    CustomHybridClusterScanComparisonIntegrationTest.setParams(tableName,
        schemaFilePath, segQueryDirPath, invIndexCols, sortedCol);
    TestListenerAdapter tla = new TestListenerAdapter();
    TestNG testng = new TestNG();
    testng.setTestClasses(new Class[]{CustomHybridClusterScanComparisonIntegrationTest.class});
    testng.addListener(tla);
    testng.run();
    System.out.println("Passed tests: " + tla.getPassedTests());
    if (!tla.getSkippedTests().isEmpty()) {
      System.out.println("Skipped tests: " + tla.getSkippedTests());
    }
    System.out.println(tla.toString());
    if (!tla.getFailedTests().isEmpty()) {
      System.err.println("Failed tests:" + tla.getFailedTests());
      System.exit(1);
    }
    System.exit(0);
  }

  public static class CustomHybridClusterScanComparisonIntegrationTest extends HybridClusterScanComparisonIntegrationTest {

    private static String _timeColName;
    private static String _timeColType;
    private static List<File> _avroFiles = new ArrayList<>(4);
    private static List<String> _invIndexCols = new ArrayList<>(4);
    private static String _tableName;
    private static String _queryFilePath;
    private static String _segsQueryDir;
    private static final String _logFileSuffix = "query-comparison.log";
    private static final String QUERY_FILE_NAME = "queries.txt";
    private static final String AVRO_DIR = "avro-files";
    private static final String SCAN_RSP_FILE_NAME = "scan-responses.txt";  // Will have more number of lines than queries file
    private static File _schemaFile;
    private static String _sortedColumn;
    private static String _logFileName;
    private static boolean _inCmdLine = false;
    private static boolean _recordScanResponses = false;  // Must be done in single-threaded mode if true
    private static boolean _useLlc = false;  // Whether to use kafka low-level consumer
    private static boolean _compareWithRspFile = true;
    private static String _scanRspFilePath;

    private boolean _multiThreaded = true;
    private FileWriter _scanRspFileWriter;
    private LineNumberReader _scanRspFileReader;

    public static void setParams(String tableName, String schemaFileName, String segsQueryDir,
        String invIndexCols, String sortedCol) throws Exception {
      // TODO add some basic checks
      // TODO add params for single query
      _tableName = tableName;
      _queryFilePath = segsQueryDir + "/" + QUERY_FILE_NAME;
      _scanRspFilePath = segsQueryDir  + "/" + SCAN_RSP_FILE_NAME;
      _sortedColumn = sortedCol;
      _segsQueryDir = segsQueryDir;
      File avroDir = new File(segsQueryDir + "/" + AVRO_DIR);

      File[] avroFiles = avroDir.listFiles();
      for (File file : avroFiles) {
        if (!file.getName().matches("[0-9].*")) {
          throw new RuntimeException(
              "Avro file names must start with a digit that indicates starting time/day of avro events in the file");
        }
        _avroFiles.add(file);
      }
      Collections.sort(_avroFiles);
      String[] colNames = invIndexCols.split(",");
      for (String colName : colNames) {
        _invIndexCols.add(colName);
      }
      _schemaFile = new File(schemaFileName);
      Schema schema = Schema.fromFile(_schemaFile);
      _timeColName = schema.getTimeColumnName();
      _timeColType = schema.getIncomingTimeUnit().toString();

      _logFileName = _tableName + "-" + System.currentTimeMillis() + "-" + _logFileSuffix;
      _inCmdLine = true;
    }

    @Override
    protected List<File> getAllAvroFiles() {
      return _avroFiles;
    }

    @Override
    public File getSchemaFile() {
      return _schemaFile;
    }

    @Override
    protected String getTimeColumnName() {
      return _timeColName;
    }

    @Override
    protected String getTimeColumnType() {
      return _timeColType;
    }

    @Override
    protected String getSortedColumn() {
      return _sortedColumn;
    }

    @Override
    protected int getKafkaPartitionCount() {
      if (_useLlc) {
        return 2;
      } else {
        return 10;
      }
    }

    @Override
    @BeforeClass
    public void setUp() throws Exception {
      if (!_inCmdLine) {
        return;
      }
      _nQueriesRead = 0;
      _createSegmentsInParallel = true;
      File scanRspFile = new File(_scanRspFilePath);
      if (_recordScanResponses) {
        _compareWithRspFile = false;
        if (scanRspFile.exists()) {
          throw new RuntimeException(_scanRspFilePath + " already exists");
        }
        _scanRspFileWriter = new FileWriter(scanRspFile);
        _multiThreaded = false;
      } else {
        // Attempt to compare with a response file if it exists
        if (scanRspFile.exists()) {
          _scanRspFileReader = new LineNumberReader(new FileReader(scanRspFile));
        } else {
          // Run live queries with the scan-based query runner.
          _compareWithRspFile = false;
        }
      }

      for (String col : _invIndexCols) {
        invertedIndexColumns.add(col);
      }
      super.setUp();
    }

    @Override
    @AfterClass
    public void tearDown() throws Exception{
      if (!_inCmdLine) {
        return;
      }
      if (_recordScanResponses) {
        _scanRspFileWriter.flush();
        _scanRspFileWriter.close();
      }
      super.tearDown();
    }

    @Override
    protected boolean shouldUseLlc() {
      return _useLlc;
    }

    @Override
    protected long getStabilizationTimeMs() {
      return 5 * 60 * 1000L;
    }

    @Override
    protected int getAvroFileCount() {
      return _avroFiles.size();
    }

    @Override
    protected FileWriter getScanRspRecordFileWriter() {
      return _scanRspFileWriter;
    }

    @Override
    protected FileWriter getLogWriter()
        throws IOException {
      return new FileWriter(new File(_logFileName));
    }

    protected FileReader getQueryFile()
        throws FileNotFoundException {
      return new FileReader(_queryFilePath);
    }

    private String replaceTableName(String query) {
      return query.replace(_tableName, "mytable");
    }

    @Test
    public void testQueriesFromLog() throws Exception {
      try {
        if (!_inCmdLine) {
          return;
        }
        runTestLoop(new Callable<Object>() {
          @Override
          public Object call()
              throws Exception {
            LineNumberReader queryReader = new LineNumberReader(getQueryFile());
            for (; ; ) {
              String line = queryReader.readLine();
              if (line == null) {
                break;
              }
              _nQueriesRead++;
              final String query = replaceTableName(line);
              String compareStr = null;
              if (_compareWithRspFile) {
                compareStr = _scanRspFileReader.readLine();
                if (compareStr == null) {
                  Assert.fail("Not enough lines in " + _scanRspFilePath);
                }
              }
              final String scanRsp = compareStr;
              if (_multiThreaded) {
                runQueryAsync(query, scanRsp);
              } else {
                runQuery(query, _scanBasedQueryProcessor, false, scanRsp);
              }
            }
            queryReader.close();
            return null;
          }
        }, _multiThreaded);
        System.out.println(
            getNumSuccesfulQueries() + " Passed, " + getNumFailedQueries() + " Failed, " + getNumEmptyResults() + " empty results");
        Assert.assertEquals(0, getNumFailedQueries(), "There were query failures. See " + _logFileName);
      } catch (Exception e) {
        System.out.println("Caught exception while running queries from log");
        e.printStackTrace();
        throw e;
      }
    }
  }
}