/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.tools.query.comparison; import com.google.common.base.Preconditions; import com.linkedin.pinot.common.request.BrokerRequest; import com.linkedin.pinot.common.segment.ReadMode; import com.linkedin.pinot.common.segment.SegmentMetadata; import com.linkedin.pinot.common.utils.request.RequestUtils; import com.linkedin.pinot.core.indexsegment.IndexSegment; import com.linkedin.pinot.core.segment.index.loader.Loaders; import com.linkedin.pinot.pql.parsers.Pql2Compiler; import java.io.File; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; /** * Given a list of dimension and metric columns, and possible values for dimension columns: * Generate queries of the form: SELECT SUM(m1), SUM(m2)... WHERE d1='v1' AND d2='v2'... GROUP BY d3,d4... */ public class StarTreeQueryGenerator { private static final String SELECT = "SELECT "; private static final String FROM = " FROM "; private static final String WHERE = " WHERE "; private static final String GROUP_BY = " GROUP BY "; private static final String BETWEEN = " BETWEEN "; private static final String IN = " IN "; private static final String AND = " AND "; private static final int MAX_NUM_AGGREGATIONS = 3; private static final int MAX_NUM_PREDICATES = 3; private static final int MAX_NUM_GROUP_BYS = 2; private static final int MAX_NUM_IN_VALUES = 5; private static final int SHUFFLE_THRESHOLD = 5 * MAX_NUM_IN_VALUES; private static final Random RANDOM = new Random(); // Add more functions here to generate them in the queries. private static final List<String> AGGREGATION_FUNCTIONS = Collections.singletonList("SUM"); // Add more comparators here to generate them in the 'WHERE' clause. private static final List<String> COMPARATORS = Arrays.asList("=", "<>", "<", ">", "<=", ">="); private final String _tableName; private final List<String> _singleValueDimensionColumns; private final List<String> _metricColumns; private final Map<String, List<Object>> _singleValueDimensionValuesMap; public StarTreeQueryGenerator(String tableName, List<String> singleValueDimensionColumns, List<String> metricColumns, Map<String, List<Object>> singleValueDimensionValuesMap) { _tableName = tableName; _singleValueDimensionColumns = singleValueDimensionColumns; _metricColumns = metricColumns; _singleValueDimensionValuesMap = singleValueDimensionValuesMap; } /** * Generate one aggregation function for the given metric column. * * @param metricColumn metric column. * @return aggregation function. */ private StringBuilder generateAggregation(String metricColumn) { StringBuilder stringBuilder = new StringBuilder(AGGREGATION_FUNCTIONS.get(RANDOM.nextInt(AGGREGATION_FUNCTIONS.size()))); return stringBuilder.append('(').append(metricColumn).append(')'); } /** * Generate the aggregation section of the query, returns at least one aggregation. * * @return aggregation section. */ private StringBuilder generateAggregations() { StringBuilder stringBuilder = new StringBuilder(); int numAggregations = Math.min(RANDOM.nextInt(MAX_NUM_AGGREGATIONS) + 1, _metricColumns.size()); Collections.shuffle(_metricColumns); for (int i = 0; i < numAggregations; i++) { if (i != 0) { stringBuilder.append(',').append(' '); } stringBuilder.append(generateAggregation(_metricColumns.get(i))); } return stringBuilder; } /** * Generate a comparison predicate for the given dimension column. * * @param dimensionColumn dimension column. * @return comparison predicate. */ private StringBuilder generateComparisonPredicate(String dimensionColumn) { StringBuilder stringBuilder = new StringBuilder(dimensionColumn); stringBuilder.append(' ').append(COMPARATORS.get(RANDOM.nextInt(COMPARATORS.size()))).append(' '); List<Object> valueArray = _singleValueDimensionValuesMap.get(dimensionColumn); Object value = valueArray.get(RANDOM.nextInt(valueArray.size())); if (value instanceof String) { stringBuilder.append('\'').append(((String) value).replaceAll("'", "''")).append('\''); } else { stringBuilder.append(value); } return stringBuilder; } /** * Generate a between predicate for the given dimension column. * * @param dimensionColumn dimension column. * @return between predicate. */ private StringBuilder generateBetweenPredicate(String dimensionColumn) { StringBuilder stringBuilder = new StringBuilder(dimensionColumn).append(BETWEEN); List<Object> valueArray = _singleValueDimensionValuesMap.get(dimensionColumn); Object value1 = valueArray.get(RANDOM.nextInt(valueArray.size())); Object value2 = valueArray.get(RANDOM.nextInt(valueArray.size())); Preconditions.checkState((value1 instanceof String && value2 instanceof String) || (value1 instanceof Number && value2 instanceof Number)); if (value1 instanceof String) { if (((String) value1).compareTo((String) value2) < 0) { stringBuilder.append('\'').append(((String) value1).replaceAll("'", "''")).append('\''); stringBuilder.append(AND); stringBuilder.append('\'').append(((String) value2).replaceAll("'", "''")).append('\''); } else { stringBuilder.append('\'').append(((String) value2).replaceAll("'", "''")).append('\''); stringBuilder.append(AND); stringBuilder.append('\'').append(((String) value1).replaceAll("'", "''")).append('\''); } } else { if (((Number) value1).doubleValue() < ((Number) value2).doubleValue()) { stringBuilder.append(value1).append(AND).append(value2); } else { stringBuilder.append(value2).append(AND).append(value1); } } return stringBuilder; } /** * Generate a in predicate for the given dimension column. * * @param dimensionColumn dimension column. * @return in predicate. */ private StringBuilder generateInPredicate(String dimensionColumn) { StringBuilder stringBuilder = new StringBuilder(dimensionColumn).append(IN).append('('); List<Object> valueArray = _singleValueDimensionValuesMap.get(dimensionColumn); int size = valueArray.size(); int numValues = Math.min(RANDOM.nextInt(MAX_NUM_IN_VALUES) + 1, size); if (size < SHUFFLE_THRESHOLD) { // For smaller size values, use shuffle strategy. Collections.shuffle(valueArray); for (int i = 0; i < numValues; i++) { if (i != 0) { stringBuilder.append(',').append(' '); } Object value = valueArray.get(i); if (value instanceof String) { stringBuilder.append('\'').append(((String) value).replaceAll("'", "''")).append('\''); } else { stringBuilder.append(value); } } } else { // For larger size values, use random indices strategy. Set<Integer> indices = new HashSet<>(); while (indices.size() < numValues) { indices.add(RANDOM.nextInt(size)); } boolean isFirst = true; for (int index : indices) { if (isFirst) { isFirst = false; } else { stringBuilder.append(',').append(' '); } Object value = valueArray.get(index); if (value instanceof String) { stringBuilder.append('\'').append(((String) value).replaceAll("'", "''")).append('\''); } else { stringBuilder.append(value); } } } return stringBuilder.append(')'); } /** * Randomly generate the WHERE clause of the query, may return empty string. * * @return all predicates. */ private StringBuilder generatePredicates() { int numPredicates = Math.min(RANDOM.nextInt(MAX_NUM_PREDICATES + 1), _singleValueDimensionColumns.size()); if (numPredicates == 0) { return null; } StringBuilder stringBuilder = new StringBuilder(WHERE); Collections.shuffle(_singleValueDimensionColumns); for (int i = 0; i < numPredicates; i++) { if (i != 0) { stringBuilder.append(AND); } String dimensionName = _singleValueDimensionColumns.get(i); switch (RANDOM.nextInt(3)) { case 0: stringBuilder.append(generateComparisonPredicate(dimensionName)); break; case 1: stringBuilder.append(generateBetweenPredicate(dimensionName)); break; default: stringBuilder.append(generateInPredicate(dimensionName)); break; } } return stringBuilder; } /** * Randomly generate the GROUP BY section, may return empty string. * * @return group by section. */ private StringBuilder generateGroupBys() { int numPredicates = Math.min(RANDOM.nextInt(MAX_NUM_GROUP_BYS + 1), _singleValueDimensionColumns.size()); if (numPredicates == 0) { return null; } StringBuilder stringBuilder = new StringBuilder(GROUP_BY); Collections.shuffle(_singleValueDimensionColumns); for (int i = 0; i < numPredicates; i++) { if (i != 0) { stringBuilder.append(',').append(' '); } stringBuilder.append(_singleValueDimensionColumns.get(i)); } return stringBuilder; } /** * Combine the various sections to form the overall query. * * @param aggregations aggregation section. * @param predicates predicate section. * @param groupBys group by section. * @return overall query. */ private String buildQuery(StringBuilder aggregations, StringBuilder predicates, StringBuilder groupBys) { StringBuilder stringBuilder = new StringBuilder(SELECT).append(aggregations); stringBuilder.append(FROM).append(_tableName); if (predicates != null) { stringBuilder.append(predicates); } if (groupBys != null) { stringBuilder.append(groupBys); } return stringBuilder.toString(); } /** * Randomly generate a query. * * @return Return the generated query. */ public String nextQuery() { return buildQuery(generateAggregations(), generatePredicates(), generateGroupBys()); } /** * Given star tree segments directory and number of queries, generate star tree queries. * Usage: StarTreeQueryGenerator starTreeSegmentsDirectory numQueries * * @param args arguments. */ public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: StarTreeQueryGenerator starTreeSegmentsDirectory numQueries"); return; } // Get segment metadata for the first segment to get table name and verify query is fit for star tree. File segmentsDir = new File(args[0]); Preconditions.checkState(segmentsDir.exists()); Preconditions.checkState(segmentsDir.isDirectory()); File[] segments = segmentsDir.listFiles(); Preconditions.checkNotNull(segments); File segment = segments[0]; IndexSegment indexSegment = Loaders.IndexSegment.load(segment, ReadMode.heap); SegmentMetadata segmentMetadata = indexSegment.getSegmentMetadata(); String tableName = segmentMetadata.getTableName(); // Set up star tree query generator. int numQueries = Integer.parseInt(args[1]); SegmentInfoProvider infoProvider = new SegmentInfoProvider(args[0]); StarTreeQueryGenerator generator = new StarTreeQueryGenerator(tableName, infoProvider.getSingleValueDimensionColumns(), infoProvider.getMetricColumns(), infoProvider.getSingleValueDimensionValuesMap()); Pql2Compiler compiler = new Pql2Compiler(); for (int i = 0; i < numQueries; i++) { String query = generator.nextQuery(); System.out.println(query); // Verify that query is fit for star tree. BrokerRequest brokerRequest = compiler.compileToBrokerRequest(query); Preconditions.checkState( RequestUtils.isFitForStarTreeIndex(segmentMetadata, RequestUtils.generateFilterQueryTree(brokerRequest), brokerRequest)); } } }