/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.tools.query.comparison; import com.linkedin.pinot.tools.scan.query.GroupByOperator; import com.linkedin.pinot.tools.scan.query.QueryResponse; import com.linkedin.pinot.tools.scan.query.ScanBasedQueryProcessor; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.codehaus.jackson.map.ObjectMapper; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class QueryComparison { private static final Logger LOGGER = LoggerFactory.getLogger(QueryComparison.class); private static final double EPSILON = 0.00001; private static final String SELECTION_RESULTS = "selectionResults"; private static final String AGGREGATION_RESULTS = "aggregationResults"; private static final String NUM_DOCS_SCANNED = "numDocsScanned"; private static final String FUNCTION = "function"; private static final String VALUE = "value"; private static final String COLUMNS = "columns"; private static final String RESULTS = "results"; private static final String GROUP_BY_COLUMNS = "groupByColumns"; private static final String GROUP_BY_RESULT = "groupByResult"; private static final String GROUP = "group"; private static final String TIME_USED_MS = "timeUsedMs"; private static final String EXCEPTIONS = "exceptions"; private static boolean _compareNumDocs = true; private File _segmentsDir; private File _queryFile; private File _resultFile; private final QueryComparisonConfig _config; private ClusterStarter _clusterStarter; public QueryComparison(QueryComparisonConfig config) { _config = config; _queryFile = new File(config.getQueryFile()); if (!_queryFile.exists() || !_queryFile.isFile()) { LOGGER.error("Invalid query file: {}", _queryFile.getName()); return; } String segmentDir = config.getSegmentsDir(); String results = config.getResultFile(); if (segmentDir == null && results == null) { LOGGER.error("Neither segments directory nor expected results file specified"); return; } _segmentsDir = (segmentDir != null) ? new File(segmentDir) : null; _resultFile = (results != null) ? new File(results) : null; if (_segmentsDir != null && (!_segmentsDir.exists() || !_segmentsDir.isDirectory())) { LOGGER.error("Invalid segments directory: {}", _segmentsDir.getName()); return; } if (_config.getPerfMode() && (_config.getPerfUrl() == null)) { LOGGER.error("Must specify perf url in perf mode"); return; } } private void run() throws Exception { startCluster(); // For function mode, compare response with the expected response. if (_config.getFunctionMode()) { runFunctionMode(); } // For perf mode, count the client side response time. if (_config.getPerfMode()) { runPerfMode(); } } private void runFunctionMode() throws Exception { BufferedReader resultReader = null; ScanBasedQueryProcessor scanBasedQueryProcessor = null; try ( BufferedReader queryReader = new BufferedReader(new InputStreamReader(new FileInputStream(_queryFile), "UTF8")) ) { if (_resultFile == null) { scanBasedQueryProcessor = new ScanBasedQueryProcessor(_segmentsDir.getAbsolutePath()); } else { resultReader = new BufferedReader(new InputStreamReader(new FileInputStream(_resultFile), "UTF8")); } int passed = 0; int total = 0; String query; while ((query = queryReader.readLine()) != null) { if (query.isEmpty() || query.startsWith("#")) { continue; } JSONObject expectedJson = null; try { if (resultReader != null) { expectedJson = new JSONObject(resultReader.readLine()); } else { QueryResponse expectedResponse = scanBasedQueryProcessor.processQuery(query); expectedJson = new JSONObject(new ObjectMapper().writeValueAsString(expectedResponse)); } } catch (Exception e) { LOGGER.error("Comparison FAILED: Id: {} Exception caught while getting expected response for query: '{}'", total, query, e); } JSONObject actualJson = null; if (expectedJson != null) { try { actualJson = new JSONObject(_clusterStarter.query(query)); } catch (Exception e) { LOGGER.error("Comparison FAILED: Id: {} Exception caught while running query: '{}'", total, query, e); } } if (expectedJson != null && actualJson != null) { try { if (compare(actualJson, expectedJson)) { passed++; LOGGER.info("Comparison PASSED: Id: {} actual Time: {} ms expected Time: {} ms Docs Scanned: {}", total, actualJson.get(TIME_USED_MS), expectedJson.get(TIME_USED_MS), actualJson.get(NUM_DOCS_SCANNED)); LOGGER.debug("actual Response: {}", actualJson); LOGGER.debug("expected Response: {}", expectedJson); } else { LOGGER.error("Comparison FAILED: Id: {} query: {}", query); LOGGER.info("actual Response: {}", actualJson); LOGGER.info("expected Response: {}", expectedJson); } } catch (Exception e) { LOGGER.error("Comparison FAILED: Id: {} Exception caught while comparing query: '{}' actual response: {}, expected response: {}", total, query, actualJson, expectedJson, e); } } total++; } LOGGER.info("Total {} out of {} queries passed.", passed, total); } finally { if (resultReader != null) { resultReader.close(); } } } private void runPerfMode() throws Exception { try ( BufferedReader queryReader = new BufferedReader(new InputStreamReader(new FileInputStream(_queryFile), "UTF8")) ) { String query; while ((query = queryReader.readLine()) != null) { if (query.isEmpty() || query.startsWith("#")) { continue; } int clientTime = _clusterStarter.perfQuery(query); LOGGER.info("Client side response time: {} ms", clientTime); } } } private void startCluster() throws Exception { _clusterStarter = new ClusterStarter(_config); if (_config.getStartCluster()) { LOGGER.info("Bringing up Pinot Cluster"); _clusterStarter.start(); LOGGER.info("Pinot Cluster is now up"); } else { LOGGER.info("Skipping cluster setup as specified in the config file."); } } public enum ComparisonStatus { PASSED, EMPTY, FAILED } public static ComparisonStatus compareWithEmpty(JSONObject actualJson, JSONObject expectedJson) throws JSONException { if (actualJson.getJSONArray(EXCEPTIONS).length() != 0) { return ComparisonStatus.FAILED; } // If no records found, nothing to compare. if ((actualJson.getInt(NUM_DOCS_SCANNED) == 0) && expectedJson.getInt(NUM_DOCS_SCANNED) == 0) { LOGGER.info("Empty results, nothing to compare."); return ComparisonStatus.EMPTY; } if (!compareSelection(actualJson, expectedJson)) { return ComparisonStatus.FAILED; } if (!compareAggregation(actualJson, expectedJson)) { return ComparisonStatus.FAILED; } return ComparisonStatus.PASSED; } public static boolean compare(JSONObject actualJson, JSONObject expectedJson) throws JSONException { ComparisonStatus comparisonStatus = compareWithEmpty(actualJson, expectedJson); return !comparisonStatus.equals(ComparisonStatus.FAILED); } /** * Some clients (eg Star Tree) may have different num docs scanned, but produce the same result. * This compare method will ignore comparing number of documents scanned. * @param actualJson * @param expectedJson * @param compareNumDocs * @return */ public static boolean compare(JSONObject actualJson, JSONObject expectedJson, boolean compareNumDocs) throws JSONException { _compareNumDocs = compareNumDocs; return compare(actualJson, expectedJson); } public static void setCompareNumDocs(boolean compareNumDocs) { _compareNumDocs = compareNumDocs; } public static boolean compareAggregation(JSONObject actualJson, JSONObject expectedJson) throws JSONException { if (!actualJson.has(AGGREGATION_RESULTS) && !expectedJson.has(AGGREGATION_RESULTS)) { return true; } JSONArray actualAggregation = actualJson.getJSONArray(AGGREGATION_RESULTS); if (actualAggregation.length() == 0) { return !expectedJson.has(AGGREGATION_RESULTS); } if (_compareNumDocs && !compareNumDocsScanned(actualJson, expectedJson)) { return false; } if (actualAggregation.getJSONObject(0).has(GROUP_BY_RESULT)) { return compareAggregationGroupBy(actualJson, expectedJson); } JSONArray expectedAggregation = expectedJson.getJSONArray(AGGREGATION_RESULTS); return compareAggregationArrays(actualAggregation, expectedAggregation); } private static boolean compareAggregationArrays(JSONArray actualAggregation, JSONArray expectedAggregation) throws JSONException { Map<String, Double> map = new HashMap<>(); for (int i = 0; i < expectedAggregation.length(); ++i) { JSONObject object = expectedAggregation.getJSONObject(i); map.put(object.getString(FUNCTION).toLowerCase(), Double.valueOf(object.getString(VALUE))); } for (int i = 0; i < actualAggregation.length(); ++i) { JSONObject object = actualAggregation.getJSONObject(i); String function = object.getString(FUNCTION).toLowerCase(); String valueString = object.getString(VALUE); if (!isNumeric(valueString)) { LOGGER.warn("Found non-numeric value for aggregation ignoring Function: {} Value: {}", function, valueString); continue; } Double value = Double.valueOf(valueString); if (!map.containsKey(function)) { LOGGER.error("expected Response does not contain function {}", function); return false; } Double expectedValue = map.get(function); if (!fuzzyEqual(value, expectedValue)) { LOGGER.error("Aggregation value mismatch for function {}, {}, {}", function, value, expectedValue); return false; } } return true; } private static boolean compareAggregationGroupBy(JSONObject actualJson, JSONObject expectedJson) throws JSONException { JSONArray actualGroupByResults = actualJson.getJSONArray(AGGREGATION_RESULTS); JSONArray expectedGroupByResults = expectedJson.getJSONArray(AGGREGATION_RESULTS); int numActualGroupBy = actualGroupByResults.length(); int numExpectedGroupBy = expectedGroupByResults.length(); // Build map based on function (function_column name) to match individual entries. Map<String, Integer> functionMap = new HashMap<>(); for (int i = 0; i < numExpectedGroupBy; ++i) { JSONObject expectedAggr = expectedGroupByResults.getJSONObject(i); String expectedFunction = expectedAggr.getString(FUNCTION).toLowerCase(); functionMap.put(expectedFunction, i); } for (int i = 0; i < numActualGroupBy; ++i) { JSONObject actualAggr = actualGroupByResults.getJSONObject(i); String actualFunction = actualAggr.getString(FUNCTION).toLowerCase(); if (!functionMap.containsKey(actualFunction)) { LOGGER.error("Missing group by function in expected response: {}", actualFunction); return false; } JSONObject expectedAggr = expectedGroupByResults.getJSONObject(functionMap.get(actualFunction)); if (!compareGroupByColumns(actualAggr, expectedAggr)) { return false; } if (!compareAggregationValues(actualAggr, expectedAggr)) { return false; } } return true; } private static List<Object> jsonArrayToList(JSONArray jsonArray) throws JSONException { List<Object> list = new ArrayList<>(); for (int i = 0; i < jsonArray.length(); ++i) { list.add(jsonArray.get(i)); } return list; } private static boolean compareAggregationValues(JSONObject actualAggr, JSONObject expectedAggr) throws JSONException { JSONArray actualResult = actualAggr.getJSONArray(GROUP_BY_RESULT); JSONArray expectedResult = expectedAggr.getJSONArray(GROUP_BY_RESULT); Map<GroupByOperator, Double> expectedMap = new HashMap<>(); for (int i = 0; i < expectedResult.length(); ++i) { List<Object> group = jsonArrayToList(expectedResult.getJSONObject(i).getJSONArray(GROUP)); GroupByOperator groupByOperator = new GroupByOperator(group); expectedMap.put(groupByOperator, expectedResult.getJSONObject(i).getDouble(VALUE)); } for (int i = 0; i < actualResult.length(); ++i) { List<Object> group = jsonArrayToList(actualResult.getJSONObject(i).getJSONArray(GROUP)); GroupByOperator groupByOperator = new GroupByOperator(group); double actualValue = actualResult.getJSONObject(i).getDouble(VALUE); if (!expectedMap.containsKey(groupByOperator)) { LOGGER.error("Missing group by value for group: {}", group); return false; } double expectedValue = expectedMap.get(groupByOperator); if (!fuzzyEqual(actualValue, expectedValue)) { LOGGER.error("Aggregation group by value mis-match: actual: {}, expected: {}", actualValue, expectedValue); return false; } } return true; } private static boolean compareGroupByColumns(JSONObject actualAggr, JSONObject expectedAggr) throws JSONException { JSONArray actualCols = actualAggr.getJSONArray(GROUP_BY_COLUMNS); JSONArray expectedCols = expectedAggr.getJSONArray(GROUP_BY_COLUMNS); if (!compareLists(actualCols, expectedCols, null)) { return false; } return true; } private static boolean compareSelection(JSONObject actualJson, JSONObject expectedJson) throws JSONException { if (!actualJson.has(SELECTION_RESULTS) && !expectedJson.has(SELECTION_RESULTS)) { return true; } /* We cannot compare numDocsScanned in selection because when we just return part of the selection result (has a low limit), this number can change over time. */ JSONObject actualSelection = actualJson.getJSONObject(SELECTION_RESULTS); JSONObject expectedSelection = expectedJson.getJSONObject(SELECTION_RESULTS); Map<Integer, Integer> expectedToActualColMap = new HashMap<Integer, Integer>(actualSelection.getJSONArray(COLUMNS).length()); return compareLists(actualSelection.getJSONArray(COLUMNS), expectedSelection.getJSONArray(COLUMNS), expectedToActualColMap) && compareSelectionRows(actualSelection.getJSONArray(RESULTS), expectedSelection.getJSONArray(RESULTS), expectedToActualColMap); } private static boolean compareSelectionRows(JSONArray actualRows, JSONArray expectedRows, Map<Integer, Integer> expectedToActualColMap) throws JSONException { final int numActualRows = actualRows.length(); final int numExpectedRows = expectedRows.length(); if (numActualRows > numExpectedRows) { LOGGER.error("In selection, number of actual rows: {} more than expected rows: {}", numActualRows, numExpectedRows); return false; } Map<String, Integer> expectedRowMap = new HashMap<>(numExpectedRows); for (int i = 0; i < numExpectedRows; i++) { String serialized = serializeRow(expectedRows.getJSONArray(i), expectedToActualColMap); Integer count = expectedRowMap.get(serialized); if (count == null) { expectedRowMap.put(serialized, 1); } else { expectedRowMap.put(serialized, count + 1); } } for (int i = 0; i < numActualRows; i++) { String serialized = serializeRow(actualRows.getJSONArray(i), null); Integer count = expectedRowMap.get(serialized); if (count == null || count == 0) { LOGGER.error("Cannot find match for row {} in actual result", i); return false; } expectedRowMap.put(serialized, count - 1); } return true; } private static String serializeRow(JSONArray row, Map<Integer, Integer> expectedToActualColMap) throws JSONException { StringBuilder sb = new StringBuilder(); final int numCols = row.length(); sb.append(numCols).append('_'); for (int i = 0; i < numCols; i++) { String toAppend; if (expectedToActualColMap == null) { toAppend = row.getString(i); } else { toAppend = row.getString(expectedToActualColMap.get(i)); } // For number value, uniform the format and do fuzzy comparison try { double numValue = Double.parseDouble(toAppend); sb.append((int) (numValue * 100)).append('_'); } catch (NumberFormatException e) { sb.append(toAppend).append('_'); } } return sb.toString(); } private static boolean isNumeric(String str) { if (str == null) { return false; } try { Double.parseDouble(str); } catch (NumberFormatException nfe) { return false; } return true; } private static boolean compareLists(JSONArray actualList, JSONArray expectedList, Map<Integer, Integer> expectedToActualColMap) throws JSONException { int actualSize = actualList.length(); int expectedSize = expectedList.length(); if (actualSize != expectedSize) { LOGGER.error("Number of columns mis-match: actual: {} expected: {}", actualSize, expectedSize); return false; } if (expectedToActualColMap == null) { for (int i = 0; i < expectedList.length(); ++i) { String actualColumn = actualList.getString(i); String expectedColumn = expectedList.getString(i); if (!actualColumn.equals(expectedColumn)) { LOGGER.error("Column name mis-match: actual: {} expected: {}", actualColumn, expectedColumn); return false; } } } else { for (int i = 0; i < expectedList.length(); i++) { boolean found = false; final String expectedColumn = expectedList.getString(i); for (int j = 0; j < actualList.length(); j++) { if (expectedColumn.equals(actualList.getString(j))) { expectedToActualColMap.put(i, j); found = true; break; } } if (!found) { LOGGER.error("Column name " + expectedColumn + " not found in actual"); return false; } } } return true; } private static boolean compareNumDocsScanned(JSONObject actualJson, JSONObject expectedJson) throws JSONException { int actualDocs = actualJson.getInt(NUM_DOCS_SCANNED); int expectedDocs = expectedJson.getInt(NUM_DOCS_SCANNED); if (actualDocs != expectedDocs) { LOGGER.error("Mis-match in number of docs scanned: actual: {} expected: {}", actualDocs, expectedDocs); return false; } return true; } private static boolean fuzzyEqual(double d1, double d2) { if (d1 == d2) { return true; } if (Math.abs(d1 - d2) < EPSILON) { return true; } // For really large numbers, use relative error. if (d1 != 0 && ((Math.abs(d1 - d2)) / Math.abs(d1)) < EPSILON) { return true; } return false; } public static void main(String[] args) { if (args.length != 1) { LOGGER.error("Incorrect number of arguments."); LOGGER.info("Usage: <exec> <config-file>"); System.exit(1); } try { QueryComparisonConfig config = new QueryComparisonConfig(new File(args[0])); QueryComparison queryComparison = new QueryComparison(config); queryComparison.run(); } catch (Exception e) { LOGGER.error("Exception caught, aborting query comparison: ", e); } System.exit(0); } }