package com.linkedin.thirdeye.client.diffsummary; import com.linkedin.thirdeye.constant.MetricAggFunction; import com.linkedin.thirdeye.datalayer.util.DaoProviderUtil; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.jfree.util.Log; import org.joda.time.DateTime; import com.google.common.collect.Lists; import com.linkedin.thirdeye.client.MetricExpression; import com.linkedin.thirdeye.client.MetricFunction; import com.linkedin.thirdeye.client.ThirdEyeCacheRegistry; import com.linkedin.thirdeye.client.ThirdEyeClient; import com.linkedin.thirdeye.client.ThirdEyeRequest; import com.linkedin.thirdeye.client.ThirdEyeRequest.ThirdEyeRequestBuilder; import com.linkedin.thirdeye.client.ThirdEyeResponse; import com.linkedin.thirdeye.client.cache.QueryCache; import com.linkedin.thirdeye.common.ThirdEyeConfiguration; import com.linkedin.thirdeye.dashboard.ThirdEyeDashboardConfiguration; import com.linkedin.thirdeye.dashboard.Utils; import com.linkedin.thirdeye.dashboard.views.diffsummary.Summary; /** * This class generates query requests to the backend database and retrieve the data for summary algorithm. * * The generated requests are organized the following tree structure: * Root level by GroupBy dimensions. * Mid level by "baseline" or "current"; The "baseline" request is ordered before the "current" request. * Leaf level by metric functions; This level is handled by the request itself, i.e., a request can gather multiple * metric functions at the same time. * The generated requests are store in a List. Because of the tree structure, the requests belong to the same * timeline (baseline or current) are located together. Then, the requests belong to the same GroupBy dimension are * located together. */ public class PinotThirdEyeSummaryClient implements OLAPDataBaseClient { private final static DateTime NULL_DATETIME = new DateTime(); private final static int TIME_OUT_VALUE = 120; private final static TimeUnit TIME_OUT_UNIT = TimeUnit.SECONDS; private QueryCache queryCache; private String collection; private DateTime baselineStartInclusive = NULL_DATETIME; private DateTime baselineEndExclusive = NULL_DATETIME; private DateTime currentStartInclusive = NULL_DATETIME; private DateTime currentEndExclusive = NULL_DATETIME; private MetricExpression metricExpression; private List<MetricFunction> metricFunctions; private MetricExpressionsContext context; public PinotThirdEyeSummaryClient(QueryCache queryCache) { this.queryCache = queryCache; } public PinotThirdEyeSummaryClient(ThirdEyeClient thirdEyeClient) { this(new QueryCache(thirdEyeClient, Executors.newFixedThreadPool(10))); } @Override public void setCollection(String collection) { this.collection = collection; } @Override public void setMetricExpression(MetricExpression metricExpression) { this.metricExpression = metricExpression; metricFunctions = metricExpression.computeMetricFunctions(); if (metricFunctions.size() > 1) { context = new MetricExpressionsContext(); } else { context = null; } } @Override public void setBaselineStartInclusive(DateTime dateTime) { baselineStartInclusive = dateTime; } @Override public void setBaselineEndExclusive(DateTime dateTime) { baselineEndExclusive = dateTime; } @Override public void setCurrentStartInclusive(DateTime dateTime) { currentStartInclusive = dateTime; } @Override public void setCurrentEndExclusive(DateTime dateTime) { currentEndExclusive = dateTime; } @Override public Row getTopAggregatedValues() throws Exception { List<String> groupBy = Collections.emptyList(); List<ThirdEyeRequest> timeOnTimeBulkRequests = constructTimeOnTimeBulkRequests(groupBy); Row row = constructAggregatedValues(null, timeOnTimeBulkRequests).get(0).get(0); return row; } @Override public List<List<Row>> getAggregatedValuesOfDimension(Dimensions dimensions) throws Exception { List<ThirdEyeRequest> timeOnTimeBulkRequests = new ArrayList<>(); for (int level = 0; level < dimensions.size(); ++level) { List<String> groupBy = Lists.newArrayList(dimensions.get(level)); timeOnTimeBulkRequests.addAll(constructTimeOnTimeBulkRequests(groupBy)); } List<List<Row>> rows = constructAggregatedValues(dimensions, timeOnTimeBulkRequests); return rows; } @Override public List<List<Row>> getAggregatedValuesOfLevels(Dimensions dimensions) throws Exception { List<ThirdEyeRequest> timeOnTimeBulkRequests = new ArrayList<>(); for (int level = 0; level < dimensions.size() + 1; ++level) { List<String> groupBy = Lists.newArrayList(dimensions.groupByStringsAtLevel(level)); timeOnTimeBulkRequests.addAll(constructTimeOnTimeBulkRequests(groupBy)); } List<List<Row>> rows = constructAggregatedValues(dimensions, timeOnTimeBulkRequests); return rows; } /** * Returns the baseline and current requests for the given GroupBy dimensions. * * @param groupBy the dimensions to do GroupBy queries * @return Baseline and Current requests. */ private List<ThirdEyeRequest> constructTimeOnTimeBulkRequests(List<String> groupBy) { List<ThirdEyeRequest> requests = new ArrayList<>();; // baseline requests ThirdEyeRequestBuilder builder = ThirdEyeRequest.newBuilder(); builder.setMetricFunctions(metricFunctions); builder.setGroupBy(groupBy); builder.setStartTimeInclusive(baselineStartInclusive); builder.setEndTimeExclusive(baselineEndExclusive); ThirdEyeRequest baselineRequest = builder.build("baseline"); requests.add(baselineRequest); // current requests builder = ThirdEyeRequest.newBuilder(); builder.setMetricFunctions(metricFunctions); builder.setGroupBy(groupBy); builder.setStartTimeInclusive(currentStartInclusive); builder.setEndTimeExclusive(currentEndExclusive); ThirdEyeRequest currentRequest = builder.build("current"); requests.add(currentRequest); return requests; } /** * @throws Exception Throws exceptions when no useful data is retrieved, i.e., time out, failed to connect * to the backend database, no non-zero data returned from the database, etc. */ private List<List<Row>> constructAggregatedValues(Dimensions dimensions, List<ThirdEyeRequest> bulkRequests) throws Exception { Map<ThirdEyeRequest, Future<ThirdEyeResponse>> queryResponses = queryCache.getQueryResultsAsync(bulkRequests); List<List<Row>> res = new ArrayList<>(); for (int i = 0; i < bulkRequests.size(); ) { ThirdEyeRequest baselineRequest = bulkRequests.get(i++); ThirdEyeRequest currentRequest = bulkRequests.get(i++); ThirdEyeResponse baselineResponses = queryResponses.get(baselineRequest).get(TIME_OUT_VALUE, TIME_OUT_UNIT); ThirdEyeResponse currentResponses = queryResponses.get(currentRequest).get(TIME_OUT_VALUE, TIME_OUT_UNIT); if (baselineResponses.getNumRows() == 0 || currentResponses.getNumRows() == 0) { throw new Exception("Failed to retrieve results with this request: " + (baselineResponses.getNumRows() == 0 ? baselineRequest : currentRequest)); } Map<List<String>, Row> rowTable = new HashMap<>(); buildMetricFunctionOrExpressionsRows(dimensions, baselineResponses, rowTable, true); buildMetricFunctionOrExpressionsRows(dimensions, currentResponses, rowTable, false); if (rowTable.size() == 0) { throw new Exception("Failed to retrieve non-zero results with these requests: " + baselineRequest + ", " + currentRequest); } List<Row> rows = new ArrayList<>(rowTable.values()); res.add(rows); } return res; } /** * Returns a list of rows. The value of each row is evaluated and no further processing is needed. * @param dimensions dimensions of the response * @param response the response from backend database * @param rowTable the storage for rows * @param isBaseline true if the response is for baseline values */ private void buildMetricFunctionOrExpressionsRows(Dimensions dimensions, ThirdEyeResponse response, Map<List<String>, Row> rowTable, boolean isBaseline) { for (int rowIdx = 0; rowIdx < response.getNumRows(); ++rowIdx) { double value = 0d; // If the metric expression is a single metric function, then we get the value immediately if (metricFunctions.size() <= 1) { value = response.getRow(rowIdx).getMetrics().get(0); } else { // Otherwise, we need to evaluate the expression context.reset(); for (int metricFuncIdx = 0; metricFuncIdx < metricFunctions.size(); ++metricFuncIdx) { double contextValue = response.getRow(rowIdx).getMetrics().get(metricFuncIdx); context.set(metricFunctions.get(metricFuncIdx).getMetricName(), contextValue); } try { value = MetricExpression.evaluateExpression(metricExpression, context.getContext()); } catch (Exception e) { Log.warn(e); } } if (Double.compare(0d, value) < 0 && !Double.isInfinite(value)) { List<String> dimensionValues = response.getRow(rowIdx).getDimensions(); Row row = rowTable.get(dimensionValues); if (row == null) { row = new Row(); row.setDimensions(dimensions); row.setDimensionValues(new DimensionValues(dimensionValues)); rowTable.put(dimensionValues, row); } if (isBaseline) { row.baselineValue = value; } else { row.currentValue = value; } } } } private class MetricExpressionsContext { private Map<String, Double> context; public MetricExpressionsContext () { context = new HashMap<>(); for (MetricFunction metricFunction : metricFunctions) { context.put(metricFunction.getMetricName(), 0d); } } public void set(String metricName, double value) { context.put(metricName, value); } public Map<String, Double> getContext() { return context; } public void reset() { for (Map.Entry<String, Double> entry : context.entrySet()) { entry.setValue(0d); } } } }