/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.integration.tests; import com.google.common.base.Preconditions; import com.linkedin.pinot.common.data.Schema; import com.linkedin.pinot.common.utils.FileUploadUtils; import com.linkedin.pinot.common.utils.ZkStarter; import com.linkedin.pinot.controller.helix.ControllerTestUtils; import com.linkedin.pinot.core.indexsegment.generator.SegmentVersion; import com.linkedin.pinot.tools.query.comparison.QueryComparison; import com.linkedin.pinot.tools.query.comparison.SegmentInfoProvider; import com.linkedin.pinot.tools.query.comparison.StarTreeQueryGenerator; import com.linkedin.pinot.util.TestUtils; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; import java.net.URL; import java.sql.Timestamp; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.helix.manager.zk.ZKHelixAdmin; import org.apache.helix.model.ExternalView; import org.apache.helix.model.IdealState; import org.apache.helix.tools.ClusterStateVerifier; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; /** * Integration test for Star Tree based indexes: - Sets up the Pinot cluster and creates two tables, * one with default indexes, and another with star tree indexes. - Sends queries to both the tables * and asserts that results match. - Query to reference table is sent with TOP 10000, and the * comparator ensures that response from star tree is contained within the reference response. This * is to avoid false failures when groups with same value are truncated due to LIMIT or TOP N. */ public class StarTreeClusterIntegrationTest extends ClusterTest { private static final Logger LOGGER = LoggerFactory.getLogger(StarTreeClusterIntegrationTest.class); private static final int NUM_GENERATED_QUERIES = 100; private static final int TOTAL_EXPECTED_DOCS = 115545; private static final String DEFAULT_TABLE_NAME = "myTable"; private static final String STAR_TREE_TABLE_NAME = "myStarTable"; private static final String TIME_COLUMN_NAME = "DaysSinceEpoch"; private static final String TIME_UNIT = "daysSinceEpoch"; private static final String RETENTION_TIME_UNIT = ""; private static final int RETENTION_TIME = -1; private static final int SEGMENT_COUNT = 12; private static final long TIMEOUT_IN_MILLISECONDS = 30 * 1000; private static final long TIMEOUT_IN_SECONDS = 3600; private static final File _tmpDir = new File("/tmp/StarTreeClusterIntegrationTest"); private static final File _segmentsDir = new File("/tmp/StarTreeClusterIntegrationTest/segmentDir"); private static final File _tarredSegmentsDir = new File("/tmp/StarTreeClusterIntegrationTest/tarDir"); private StarTreeQueryGenerator _queryGenerator; private File _queryFile; /** * Start the Pinot Cluster: - Zookeeper - One Controller - One Broker - Two Servers * @throws Exception */ private void startCluster() throws Exception { startZk(); startController(); startBroker(); startServers(2); } /** * Add the reference and star tree tables to the cluster. * @throws Exception */ private void addOfflineTables() throws Exception { addOfflineTable(TIME_COLUMN_NAME, TIME_UNIT, RETENTION_TIME, RETENTION_TIME_UNIT, null, null, DEFAULT_TABLE_NAME, SegmentVersion.v1); addOfflineTable(TIME_COLUMN_NAME, TIME_UNIT, RETENTION_TIME, RETENTION_TIME_UNIT, null, null, STAR_TREE_TABLE_NAME, SegmentVersion.v1); } /** * Get schema with all single-value columns. * * @return Schema with all single-value columns. * @throws IOException */ private Schema getSingleValueColumnsSchema() throws IOException { URL resourceUrl = OfflineClusterIntegrationTest.class.getClassLoader() .getResource("On_Time_On_Time_Performance_2014_100k_subset_nonulls_single_value_columns.schema"); Preconditions.checkNotNull(resourceUrl); File schemaFile = new File(resourceUrl.getFile()); return Schema.fromFile(schemaFile); } /** * Generate the reference and star tree indexes and upload to corresponding tables. * @param avroFiles * @param tableName * @param starTree * @throws IOException * @throws ArchiveException * @throws InterruptedException */ private void generateAndUploadSegments(List<File> avroFiles, String tableName, boolean starTree) throws IOException, ArchiveException, InterruptedException { BaseClusterIntegrationTest.ensureDirectoryExistsAndIsEmpty(_segmentsDir); BaseClusterIntegrationTest.ensureDirectoryExistsAndIsEmpty(_tarredSegmentsDir); ExecutorService executor = Executors.newCachedThreadPool(); BaseClusterIntegrationTest.buildSegmentsFromAvro(avroFiles, executor, 0, _segmentsDir, _tarredSegmentsDir, tableName, starTree, getSingleValueColumnsSchema()); executor.shutdown(); executor.awaitTermination(TIMEOUT_IN_SECONDS, TimeUnit.SECONDS); for (String segmentName : _tarredSegmentsDir.list()) { LOGGER.info("Uploading segment {}", segmentName); File file = new File(_tarredSegmentsDir, segmentName); FileUploadUtils.sendSegmentFile(ControllerTestUtils.DEFAULT_CONTROLLER_HOST, ControllerTestUtils.DEFAULT_CONTROLLER_API_PORT, segmentName, file, file.length()); } } /** * Waits for total docs to match the expected value in the given table. There may be delay between * @param expectedRecordCount * @param deadline * @throws Exception */ private void waitForTotalDocsToMatch(String tableName, int expectedRecordCount, long deadline) throws Exception { int actualRecordCount; do { String query = "select count(*) from " + tableName; JSONObject response = postQuery(query); actualRecordCount = response.getInt("totalDocs"); String msg = "Actual record count: " + actualRecordCount + "\tExpected count: " + expectedRecordCount; LOGGER.info(msg); Assert.assertTrue(System.currentTimeMillis() < deadline, "Failed to read all records within the deadline. " + msg); Thread.sleep(2000L); } while (expectedRecordCount != actualRecordCount); } /** * Wait for External View to be in sync with Ideal State. * @return */ private boolean waitForExternalViewUpdate() { final ZKHelixAdmin helixAdmin = new ZKHelixAdmin(ZkStarter.DEFAULT_ZK_STR); ClusterStateVerifier.Verifier customVerifier = new ClusterStateVerifier.Verifier() { @Override public boolean verify() { String clusterName = getHelixClusterName(); List<String> resourcesInCluster = helixAdmin.getResourcesInCluster(clusterName); LOGGER.info("Waiting for external view to update for resources: {} startTime: {}", resourcesInCluster, new Timestamp(System.currentTimeMillis())); for (String resourceName : resourcesInCluster) { IdealState idealState = helixAdmin.getResourceIdealState(clusterName, resourceName); ExternalView externalView = helixAdmin.getResourceExternalView(clusterName, resourceName); LOGGER.info("HERE for {},\n IS:{} \n EV:{}", resourceName, idealState, externalView); if (idealState == null || externalView == null) { return false; } Set<String> partitionSet = idealState.getPartitionSet(); for (String partition : partitionSet) { Map<String, String> instanceStateMapIS = idealState.getInstanceStateMap(partition); Map<String, String> instanceStateMapEV = externalView.getStateMap(partition); if (instanceStateMapIS == null || instanceStateMapEV == null) { return false; } if (!instanceStateMapIS.equals(instanceStateMapEV)) { return false; } } LOGGER.info("External View updated successfully for {},\n IS:{} \n EV:{}", resourceName, idealState, externalView); } LOGGER.info("External View updated successfully for {}", resourcesInCluster); return true; } }; return ClusterStateVerifier.verifyByPolling(customVerifier, TIMEOUT_IN_MILLISECONDS); } /** * Replace the star tree table name with reference table name, and add TOP 10000. The TOP 10000 is * added to make the reference result a super-set of star tree result. This will ensure any groups * with equal values that are truncated still appear in the reference result. * @param starQuery */ private String convertToRefQuery(String starQuery) { String refQuery = StringUtils.replace(starQuery, STAR_TREE_TABLE_NAME, DEFAULT_TABLE_NAME); return (refQuery + " TOP 10000"); } @BeforeClass public void setUp() throws Exception { startCluster(); addOfflineTables(); BaseClusterIntegrationTest.ensureDirectoryExistsAndIsEmpty(_tmpDir); List<File> avroFiles = BaseClusterIntegrationTest.unpackAvroData(_tmpDir, SEGMENT_COUNT); _queryFile = new File(TestUtils.getFileFromResourceUrl(BaseClusterIntegrationTest.class .getClassLoader().getResource("OnTimeStarTreeQueries.txt"))); generateAndUploadSegments(avroFiles, DEFAULT_TABLE_NAME, false); generateAndUploadSegments(avroFiles, STAR_TREE_TABLE_NAME, true); Thread.sleep(15000); // Ensure that External View is in sync with Ideal State. if (!waitForExternalViewUpdate()) { Assert.fail("Cluster did not reach stable state"); } // Wait until all docs are available, this is required because the broker routing tables may not // be updated yet. waitForTotalDocsToMatch(DEFAULT_TABLE_NAME, TOTAL_EXPECTED_DOCS, System.currentTimeMillis() + 1500000L); waitForTotalDocsToMatch(STAR_TREE_TABLE_NAME, TOTAL_EXPECTED_DOCS, System.currentTimeMillis() + 1500000L); // Initialize the query generator SegmentInfoProvider dictionaryReader = new SegmentInfoProvider(_tarredSegmentsDir.getAbsolutePath()); List<String> metricColumns = dictionaryReader.getMetricColumns(); List<String> singleValueDimensionColumns = dictionaryReader.getSingleValueDimensionColumns(); Map<String, List<Object>> singleValueDimensionValuesMap = dictionaryReader.getSingleValueDimensionValuesMap(); _queryGenerator = new StarTreeQueryGenerator(STAR_TREE_TABLE_NAME, singleValueDimensionColumns, metricColumns, singleValueDimensionValuesMap); } /** * Given a query string for star tree: - Get the result from star tree cluster - Convert the query * to reference query (change table name, add TOP 10000) - Get the result from reference cluster - * Compare the results and assert that result of star tree is contained in reference result. NOTE: * This method of testing is limited in that it cannot detect cases where a valid entry is missing * from star tree result (to be addressed in future). * @param starQuery * @param expectNonZeroDocsScanned */ public void testOneQuery(String starQuery, boolean expectNonZeroDocsScanned) { try { JSONObject starResponse = postQuery(starQuery); if (expectNonZeroDocsScanned) { int numDocsScanned = starResponse.getInt("numDocsScanned"); String message = "Zero Docs Scanned for query: " + starQuery; Assert.assertTrue((numDocsScanned > 0), message); } String refQuery = convertToRefQuery(starQuery); JSONObject refResponse = postQuery(refQuery); // Skip comparison if not all results returned for reference response. if (refResponse.getInt("numDocsScanned") > 0) { JSONObject aggregationResults = refResponse.getJSONArray("aggregationResults").getJSONObject(0); if (aggregationResults.has("groupByResult") && aggregationResults.getJSONArray("groupByResult").length() == 10000) { return; } } boolean result = QueryComparison.compare(starResponse, refResponse, false); String message = "Result mis-match for Query: " + starQuery + "\nStar: " + starResponse.toString() + "\nRef: " + refResponse.toString(); Assert.assertTrue(result, message); } catch (Exception e) { LOGGER.error("Exception caught when executing query {}", starQuery, e); } } @AfterClass public void tearDown() throws Exception { stopBroker(); stopController(); stopServer(); stopZk(); FileUtils.deleteDirectory(_tmpDir); } @Test public void testGeneratedQueries() { for (int i = 0; i < NUM_GENERATED_QUERIES; i++) { String starQuery = _queryGenerator.nextQuery(); testOneQuery(starQuery, false); } } @Test public void testHardCodedQueries() { BufferedReader queryReader = null; try { queryReader = new BufferedReader(new FileReader(_queryFile)); String starQuery; while ((starQuery = queryReader.readLine()) != null) { testOneQuery(starQuery, true); } } catch (IOException e) { throw new RuntimeException(e.getMessage()); } finally { IOUtils.closeQuietly(queryReader); } } /** * Test that when metrics have predicates on them, we still get * correct results, ie correctly fall back on non-StarTree based execution. */ @Test public void testPredicateOnMetrics() { String query; // Query containing predicate on one metric only query = "SELECT SUM(DepDelayMinutes) FROM myStarTable WHERE DepDelay > 0\n"; testOneQuery(query, false); // Query containing predicate on multiple metrics query = "SELECT SUM(DepDelayMinutes) FROM myStarTable WHERE DepDelay > 0 AND ArrDelay > 0\n"; testOneQuery(query, false); // Query containing predicate on multiple metrics and dimensions query = "SELECT SUM(DepDelayMinutes) FROM myStarTable WHERE DepDelay > 0 AND ArrDelay > 0 AND OriginStateName = 'Massachusetts'\n"; testOneQuery(query, false); } /** * Tests queries with non-equality predicates */ @Test public void testNonEqualityPredicates() { String query; // 'Range' query query = "SELECT SUM(DepDelayMinutes) FROM myStarTable WHERE DepDelay between 0 and 10000\n"; testOneQuery(query, false); // 'IN' query query = "SELECT SUM(DepDelayMinutes) FROM myStarTable WHERE Origin IN ('JFK', 'LAX', 'DCW')\n"; testOneQuery(query, false); // 'NOT IN' Query query = "SELECT SUM(DepDelayMinutes) FROM myStarTable WHERE Origin NOT IN ('JFK', 'LAX', 'DCW')\n"; testOneQuery(query, false); // 'NOT EQ' Query query = "SELECT SUM(DepDelayMinutes) FROM myStarTable WHERE Origin <> 'JFK'\n"; testOneQuery(query, false); } }