/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.integration.tests; import com.linkedin.pinot.common.utils.CommonConstants; import com.linkedin.pinot.common.utils.FileUploadUtils; import com.linkedin.pinot.core.indexsegment.generator.SegmentVersion; import java.io.File; import java.net.URL; import java.util.Collections; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.commons.configuration.Configuration; import org.apache.commons.io.FileUtils; import org.json.JSONArray; import org.json.JSONObject; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; /** * Integration test that tests the auto-generated default columns. * <p>In the test, we convert Avro data for 12 segments and run queries against it. * <p>We will add extra new columns to the schema to test adding new columns with default value to the offline segments. * <p>New columns are: (name, field type, data type, single/multi value, default null value) * <ul> * <li>"newAddedIntMetric", METRIC, INT, single-value, 1</li> * <li>"newAddedLongMetric", METRIC, LONG, single-value, 1</li> * <li>"newAddedFloatMetric", METRIC, FLOAT, single-value, default (0.0)</li> * <li>"newAddedDoubleMetric", METRIC, DOUBLE, single-value, default (0.0)</li> * <li>"newAddedIntDimension", DIMENSION, INT, single-value, default (Integer.MIN_VALUE)</li> * <li>"newAddedLongDimension", DIMENSION, LONG, single-value, default (Long.MIN_VALUE)</li> * <li>"newAddedFloatDimension", DIMENSION, FLOAT, single-value, default (Float.NEGATIVE_INFINITY)</li> * <li>"newAddedDoubleDimension", DIMENSION, DOUBLE, single-value, default (Double.NEGATIVE_INFINITY)</li> * <li>"newAddedStringDimension", DIMENSION, STRING, multi-value, "newAdded"</li> * </ul> */ public class DefaultColumnsClusterIntegrationTest extends BaseClusterIntegrationTestWithQueryGenerator { protected static final String SCHEMA_WITH_EXTRA_COLUMNS = "On_Time_On_Time_Performance_2014_100k_subset_nonulls_default_column_test_extra_columns.schema"; private static final File TMP_DIR = new File("/tmp/DefaultColumnsClusterIntegrationTest"); private static final File SEGMENT_DIR = new File("/tmp/DefaultColumnsClusterIntegrationTest/segmentDir"); private static final File TAR_DIR = new File("/tmp/DefaultColumnsClusterIntegrationTest/tarDir"); private static final int SEGMENT_COUNT = 12; @BeforeClass public void setUp() throws Exception { setUp(true); } protected void setUp(boolean sendSchema) throws Exception { // Set up directories. FileUtils.deleteQuietly(TMP_DIR); Assert.assertTrue(TMP_DIR.mkdirs()); Assert.assertTrue(SEGMENT_DIR.mkdir()); Assert.assertTrue(TAR_DIR.mkdir()); // Start the cluster. startZk(); startController(); startBroker(); startServer(); // Create the table. addOfflineTable("DaysSinceEpoch", "daysSinceEpoch", -1, "", null, null, "mytable", SegmentVersion.v1); // Add the schema. if (sendSchema) { sendSchema(SCHEMA_WITH_EXTRA_COLUMNS); } // Unpack the Avro files. List<File> avroFiles = unpackAvroData(TMP_DIR, SEGMENT_COUNT); // Load data into H2. ExecutorService executor = Executors.newCachedThreadPool(); setupH2AndInsertAvro(avroFiles, executor); // Create segments from Avro data. buildSegmentsFromAvro(avroFiles, executor, 0, SEGMENT_DIR, TAR_DIR, "mytable", false, null); // Initialize query generator. setupQueryGenerator(avroFiles, executor); executor.shutdown(); executor.awaitTermination(10, TimeUnit.MINUTES); // Set up a Helix spectator to count the number of segments that are uploaded and unlock the latch once 12 segments // are online. CountDownLatch latch = setupSegmentCountCountDownLatch("mytable", SEGMENT_COUNT); // Upload the segments. for (String segmentName : TAR_DIR.list()) { File file = new File(TAR_DIR, segmentName); FileUploadUtils.sendSegmentFile("localhost", "8998", segmentName, file, file.length()); } // Wait for all segments to be ONLINE. latch.await(); waitForSegmentsOnline(); } @Override protected void overrideOfflineServerConf(Configuration configuration) { configuration.addProperty(CommonConstants.Server.CONFIG_OF_ENABLE_DEFAULT_COLUMNS, true); } protected void sendSchema(String resourceName) throws Exception { URL resource = DefaultColumnsClusterIntegrationTest.class.getClassLoader().getResource(resourceName); Assert.assertNotNull(resource); File schemaFile = new File(resource.getFile()); addSchema(schemaFile, "mytable_OFFLINE"); } protected void waitForSegmentsOnline() throws Exception { long timeInTwoMinutes = System.currentTimeMillis() + 2 * 60 * 1000L; while (getCurrentServingNumDocs("mytable") < TOTAL_DOCS) { if (System.currentTimeMillis() < timeInTwoMinutes) { Thread.sleep(1000); } else { Assert.fail("Segments were not completely loaded within two minutes"); } } } @Override public void testGeneratedQueries(boolean withMultiValues) throws Exception { _queryGenerator.setSkipMultiValuePredicates(!withMultiValues); int generatedQueryCount = getGeneratedQueryCount(); for (int i = 0; i < generatedQueryCount; i++) { QueryGenerator.Query query = _queryGenerator.generateQuery(); String pqlQuery = query.generatePql(); // Exclude "SELECT *" queries because the result will not match. while (pqlQuery.startsWith("SELECT *")) { query = _queryGenerator.generateQuery(); pqlQuery = query.generatePql(); } runQuery(pqlQuery, query.generateH2Sql()); } } @Override protected String getTableName() { return "mytable"; } @Test public void testNewAddedColumns() throws Exception { String pqlQuery; String sqlQuery; // Test queries with each new added columns. pqlQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedIntMetric = 1"; sqlQuery = "SELECT COUNT(*) FROM mytable"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedLongMetric = 1"; sqlQuery = "SELECT COUNT(*) FROM mytable"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedFloatMetric = 0.0"; sqlQuery = "SELECT COUNT(*) FROM mytable"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedDoubleMetric = 0.0"; sqlQuery = "SELECT COUNT(*) FROM mytable"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedIntDimension < 0"; sqlQuery = "SELECT COUNT(*) FROM mytable"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedLongDimension < 0"; sqlQuery = "SELECT COUNT(*) FROM mytable"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedFloatDimension < 0.0"; sqlQuery = "SELECT COUNT(*) FROM mytable"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedDoubleDimension < 0.0"; sqlQuery = "SELECT COUNT(*) FROM mytable"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedStringDimension = 'newAdded'"; sqlQuery = "SELECT COUNT(*) FROM mytable"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); // Test queries with new added metric column in aggregation function. pqlQuery = "SELECT SUM(NewAddedIntMetric) FROM mytable WHERE DaysSinceEpoch <= 16312"; sqlQuery = "SELECT COUNT(*) FROM mytable WHERE DaysSinceEpoch <= 16312"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT SUM(NewAddedIntMetric) FROM mytable WHERE DaysSinceEpoch > 16312"; sqlQuery = "SELECT COUNT(*) FROM mytable WHERE DaysSinceEpoch > 16312"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT SUM(NewAddedLongMetric) FROM mytable WHERE DaysSinceEpoch <= 16312"; sqlQuery = "SELECT COUNT(*) FROM mytable WHERE DaysSinceEpoch <= 16312"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); pqlQuery = "SELECT SUM(NewAddedLongMetric) FROM mytable WHERE DaysSinceEpoch > 16312"; sqlQuery = "SELECT COUNT(*) FROM mytable WHERE DaysSinceEpoch > 16312"; runQuery(pqlQuery, Collections.singletonList(sqlQuery)); // Test other query forms with new added columns. JSONObject response; JSONObject groupByResult; pqlQuery = "SELECT SUM(NewAddedFloatMetric) FROM mytable GROUP BY NewAddedStringDimension"; response = postQuery(pqlQuery); groupByResult = response.getJSONArray("aggregationResults").getJSONObject(0).getJSONArray("groupByResult").getJSONObject(0); Assert.assertEquals(groupByResult.getInt("value"), 0); Assert.assertEquals(groupByResult.getJSONArray("group").getString(0), "newAdded"); pqlQuery = "SELECT SUM(NewAddedDoubleMetric) FROM mytable GROUP BY NewAddedIntDimension"; response = postQuery(pqlQuery); groupByResult = response.getJSONArray("aggregationResults").getJSONObject(0).getJSONArray("groupByResult").getJSONObject(0); Assert.assertEquals(groupByResult.getInt("value"), 0); Assert.assertEquals(groupByResult.getJSONArray("group").getString(0), String.valueOf(Integer.MIN_VALUE)); pqlQuery = "SELECT SUM(NewAddedIntMetric) FROM mytable GROUP BY NewAddedLongDimension"; response = postQuery(pqlQuery); groupByResult = response.getJSONArray("aggregationResults").getJSONObject(0).getJSONArray("groupByResult").getJSONObject(0); Assert.assertEquals(groupByResult.getInt("value"), TOTAL_DOCS); Assert.assertEquals(groupByResult.getJSONArray("group").getString(0), String.valueOf(Long.MIN_VALUE)); pqlQuery = "SELECT SUM(NewAddedIntMetric), SUM(NewAddedLongMetric), SUM(NewAddedFloatMetric), SUM(NewAddedDoubleMetric) " + "FROM mytable GROUP BY NewAddedIntDimension, NewAddedLongDimension, NewAddedFloatDimension, " + "NewAddedDoubleDimension, NewAddedStringDimension"; response = postQuery(pqlQuery); JSONArray groupByResultArray = response.getJSONArray("aggregationResults"); groupByResult = groupByResultArray.getJSONObject(0).getJSONArray("groupByResult").getJSONObject(0); Assert.assertEquals(groupByResult.getInt("value"), TOTAL_DOCS); Assert.assertEquals(groupByResult.getJSONArray("group").getString(0), String.valueOf(Integer.MIN_VALUE)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(1), String.valueOf(Long.MIN_VALUE)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(2), String.valueOf(Float.NEGATIVE_INFINITY)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(3), String.valueOf(Double.NEGATIVE_INFINITY)); groupByResult = groupByResultArray.getJSONObject(1).getJSONArray("groupByResult").getJSONObject(0); Assert.assertEquals(groupByResult.getInt("value"), TOTAL_DOCS); Assert.assertEquals(groupByResult.getJSONArray("group").getString(0), String.valueOf(Integer.MIN_VALUE)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(1), String.valueOf(Long.MIN_VALUE)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(2), String.valueOf(Float.NEGATIVE_INFINITY)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(3), String.valueOf(Double.NEGATIVE_INFINITY)); groupByResult = groupByResultArray.getJSONObject(2).getJSONArray("groupByResult").getJSONObject(0); Assert.assertEquals(groupByResult.getInt("value"), 0); Assert.assertEquals(groupByResult.getJSONArray("group").getString(0), String.valueOf(Integer.MIN_VALUE)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(1), String.valueOf(Long.MIN_VALUE)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(2), String.valueOf(Float.NEGATIVE_INFINITY)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(3), String.valueOf(Double.NEGATIVE_INFINITY)); groupByResult = groupByResultArray.getJSONObject(3).getJSONArray("groupByResult").getJSONObject(0); Assert.assertEquals(groupByResult.getInt("value"), 0); Assert.assertEquals(groupByResult.getJSONArray("group").getString(0), String.valueOf(Integer.MIN_VALUE)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(1), String.valueOf(Long.MIN_VALUE)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(2), String.valueOf(Float.NEGATIVE_INFINITY)); Assert.assertEquals(groupByResult.getJSONArray("group").getString(3), String.valueOf(Double.NEGATIVE_INFINITY)); pqlQuery = "SELECT * FROM mytable"; runQuery(pqlQuery, null); } @AfterClass public void tearDown() throws Exception { dropOfflineTable("mytable"); stopServer(); stopBroker(); stopController(); try { stopZk(); } catch (Exception e) { // Swallow ZK Exceptions. } FileUtils.deleteQuietly(TMP_DIR); } }