package com.linkedin.thirdeye.client.pinot;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import org.apache.commons.lang3.StringUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
import com.google.common.collect.Multimap;
import com.linkedin.thirdeye.api.TimeGranularity;
import com.linkedin.thirdeye.api.TimeSpec;
import com.linkedin.thirdeye.client.MetricFunction;
import com.linkedin.thirdeye.client.ThirdEyeRequest;
import com.linkedin.thirdeye.dashboard.Utils;
import com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO;
import com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO;
import com.linkedin.thirdeye.util.ThirdEyeUtils;
/**
* Util class for generated PQL queries (pinot).
*/
public class PqlUtils {
private static final Joiner AND = Joiner.on(" AND ");
private static final Joiner COMMA = Joiner.on(",");
private static final Joiner EQUALS = Joiner.on(" = ");
private static final Logger LOGGER = LoggerFactory.getLogger(PqlUtils.class);
private static final int DEFAULT_TOP = 100000;
/**
* Returns sql to calculate the sum of all raw metrics required for <tt>request</tt>, grouped by
* time within the requested date range. </br>
* Due to the summation, all metric column values can be assumed to be doubles.
* @throws ExecutionException
*/
public static String getPql(ThirdEyeRequest request, MetricFunction metricFunction, TimeSpec dataTimeSpec)
throws ExecutionException {
// TODO handle request.getFilterClause()
return getPql(metricFunction,
request.getStartTimeInclusive(), request.getEndTimeExclusive(), request.getFilterSet(),
request.getGroupBy(), request.getGroupByTimeGranularity(), dataTimeSpec);
}
private static String getPql(MetricFunction metricFunction, DateTime startTime,
DateTime endTimeExclusive, Multimap<String, String> filterSet, List<String> groupBy,
TimeGranularity timeGranularity, TimeSpec dataTimeSpec) throws ExecutionException {
MetricConfigDTO metricConfig = ThirdEyeUtils.getMetricConfigFromId(metricFunction.getMetricId());
String dataset = metricFunction.getDataset();
StringBuilder sb = new StringBuilder();
String selectionClause = getSelectionClause(metricConfig, metricFunction);
String tableName = ThirdEyeUtils.computeTableName(dataset);
sb.append("SELECT ").append(selectionClause).append(" FROM ").append(tableName);
String betweenClause = getBetweenClause(startTime, endTimeExclusive, dataTimeSpec, dataset);
sb.append(" WHERE ").append(betweenClause);
String dimensionWhereClause = getDimensionWhereClause(filterSet);
if (StringUtils.isNotBlank(dimensionWhereClause)) {
sb.append(" AND ").append(dimensionWhereClause);
}
String groupByClause = getDimensionGroupByClause(groupBy, timeGranularity, dataTimeSpec);
if (StringUtils.isNotBlank(groupByClause)) {
sb.append(" ").append(groupByClause);
sb.append(" TOP ").append(DEFAULT_TOP);
}
return sb.toString();
}
private static String getSelectionClause(MetricConfigDTO metricConfig, MetricFunction metricFunction) {
StringBuilder builder = new StringBuilder();
String metricName = null;
if (metricFunction.getMetricName().equals("*")) {
metricName = "*";
} else {
metricName = metricConfig.getName();
}
builder.append(metricFunction.getFunctionName()).append("(").append(metricName).append(")");
return builder.toString();
}
/**
* Returns pqls to handle tables where metric names are a single dimension column,
* and the metric values are all in a single value column
* @param request
* @param dataTimeSpec
* @param collectionConfig
* @return
* @throws Exception
*/
public static String getMetricAsDimensionPql(ThirdEyeRequest request, MetricFunction metricFunction,
TimeSpec dataTimeSpec, DatasetConfigDTO datasetConfig) throws Exception {
// select sum(metric_values_column) from collection
// where time_clause and metric_names_column=function.getMetricName
String metricValuesColumn = datasetConfig.getMetricValuesColumn();
String metricNamesColumn = datasetConfig.getMetricNamesColumn();
String metricAsDimensionPql = getMetricAsDimensionPql(metricFunction,
request.getStartTimeInclusive(), request.getEndTimeExclusive(), request.getFilterSet(),
request.getGroupBy(), request.getGroupByTimeGranularity(), dataTimeSpec,
metricValuesColumn, metricNamesColumn);
return metricAsDimensionPql;
}
private static String getMetricAsDimensionPql(MetricFunction metricFunction, DateTime startTime,
DateTime endTimeExclusive, Multimap<String, String> filterSet, List<String> groupBy,
TimeGranularity timeGranularity, TimeSpec dataTimeSpec, String metricValuesColumn,
String metricNamesColumn) throws ExecutionException {
MetricConfigDTO metricConfig = ThirdEyeUtils.getMetricConfigFromId(metricFunction.getMetricId());
String dataset = metricFunction.getDataset();
StringBuilder sb = new StringBuilder();
String selectionClause = getMetricAsDimensionSelectionClause(metricFunction, metricValuesColumn);
String tableName = ThirdEyeUtils.computeTableName(dataset);
sb.append("SELECT ").append(selectionClause).append(" FROM ").append(tableName);
String betweenClause = getBetweenClause(startTime, endTimeExclusive, dataTimeSpec, dataset);
sb.append(" WHERE ").append(betweenClause);
String metricWhereClause = getMetricWhereClause(metricConfig, metricFunction, metricNamesColumn);
sb.append(metricWhereClause);
String dimensionWhereClause = getDimensionWhereClause(filterSet);
if (StringUtils.isNotBlank(dimensionWhereClause)) {
sb.append(" AND ").append(dimensionWhereClause);
}
String groupByClause = getDimensionGroupByClause(groupBy, timeGranularity, dataTimeSpec);
if (StringUtils.isNotBlank(groupByClause)) {
sb.append(" ").append(groupByClause);
sb.append(" TOP ").append(DEFAULT_TOP);
}
return sb.toString();
}
private static String getMetricWhereClause(MetricConfigDTO metricConfig, MetricFunction metricFunction, String metricNameColumn) {
StringBuilder builder = new StringBuilder();
if (!metricFunction.getMetricName().equals("*")) {
builder.append(" AND ");
builder.append(String.format("%s='%s'", metricNameColumn, metricConfig.getName()));
}
return builder.toString();
}
private static String getMetricAsDimensionSelectionClause(MetricFunction metricFunction, String metricValueColumn) {
StringBuilder builder = new StringBuilder();
String metricName = metricValueColumn;
if (metricFunction.getMetricName().equals("*")) {
metricName = "*";
}
builder.append(metricFunction.getFunctionName()).append("(").append(metricName).append(")");
return builder.toString();
}
static String getBetweenClause(DateTime start, DateTime endExclusive, TimeSpec timeFieldSpec, String dataset)
throws ExecutionException {
TimeGranularity dataGranularity = timeFieldSpec.getDataGranularity();
long startMillis = start.getMillis();
long endMillis = endExclusive.getMillis();
long dataGranularityMillis = dataGranularity.toMillis();
String timeField = timeFieldSpec.getColumnName();
String timeFormat = timeFieldSpec.getFormat();
if (timeFormat == null || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
// Shrink start and end as per data granularity
long startAlignmentDelta = startMillis % dataGranularityMillis;
if (startAlignmentDelta != 0) {
long startMillisAligned = startMillis + dataGranularityMillis - startAlignmentDelta;
start = new DateTime(startMillisAligned);
}
long endAligmentDelta = endMillis % dataGranularityMillis;
if (endAligmentDelta != 0) {
long endMillisAligned = endMillis - endAligmentDelta;
endExclusive = new DateTime(endMillisAligned);
}
}
String startQueryTime;
String endQueryTimeExclusive;
if (timeFormat == null || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
long startInConvertedUnits = dataGranularity.convertToUnit(start.getMillis());
long endInConvertedUnits = dataGranularity.convertToUnit(endExclusive.getMillis());
startQueryTime = String.valueOf(startInConvertedUnits);
endQueryTimeExclusive = (endInConvertedUnits == startInConvertedUnits + 1) ?
startQueryTime : String.valueOf(endInConvertedUnits);
} else {
DateTimeFormatter inputDataDateTimeFormatter = DateTimeFormat.forPattern(timeFormat).withZone(Utils.getDataTimeZone(dataset));
startQueryTime = inputDataDateTimeFormatter.print(start);
endQueryTimeExclusive = inputDataDateTimeFormatter.print(endExclusive);
}
if (startQueryTime.equals(endQueryTimeExclusive)) {
return String.format(" %s = %s", timeField, startQueryTime);
} else {
return String.format(" %s >= %s AND %s < %s", timeField, startQueryTime, timeField,
endQueryTimeExclusive);
}
}
private static String getDimensionWhereClause(Multimap<String, String> dimensionValues) {
List<String> components = new ArrayList<>();
for (Map.Entry<String, Collection<String>> entry : dimensionValues.asMap().entrySet()) {
String key = entry.getKey();
Collection<String> values = entry.getValue();
String component;
if (values.isEmpty()) {
continue;
} else if (values.size() == 1) {
component = EQUALS.join(key, String.format("'%s'", values.iterator().next().trim()));
} else {
List<String> quotedValues = new ArrayList<>(values.size());
for (String value : values) {
quotedValues.add(String.format("'%s'", value.trim()));
}
component = String.format("%s IN (%s)", key, COMMA.join(quotedValues));
}
components.add(component);
}
if (components.isEmpty()) {
return null;
}
return AND.join(components);
}
private static String getDimensionGroupByClause(List<String> groupBy,
TimeGranularity aggregationGranulity, TimeSpec timeSpec) {
String timeColumnName = timeSpec.getColumnName();
List<String> groups = new LinkedList<String>();
if (aggregationGranulity != null && !groups.contains(timeColumnName)) {
groups.add(timeColumnName);
}
if (groupBy != null) {
groups.addAll(groupBy);
}
if (groups.isEmpty()) {
return "";
}
return String.format("GROUP BY %s", COMMA.join(groups));
}
public static String getDataTimeRangeSql(String dataset, String timeColumnName) {
return String.format("select min(%s), max(%s) from %s", timeColumnName, timeColumnName,
dataset);
}
}