package com.linkedin.thirdeye.util;
import com.google.common.collect.HashMultimap;
import com.linkedin.pinot.common.data.DimensionFieldSpec;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.FieldSpec.DataType;
import com.linkedin.pinot.common.data.TimeGranularitySpec.TimeFormat;
import com.linkedin.pinot.common.data.MetricFieldSpec;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.data.TimeFieldSpec;
import com.linkedin.pinot.common.data.TimeGranularitySpec;
import com.linkedin.thirdeye.api.DimensionMap;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import org.apache.commons.lang.StringUtils;
import org.joda.time.Period;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import com.linkedin.thirdeye.api.CollectionSchema;
import com.linkedin.thirdeye.api.DimensionSpec;
import com.linkedin.thirdeye.api.MetricSpec;
import com.linkedin.thirdeye.api.TimeGranularity;
import com.linkedin.thirdeye.api.TimeSpec;
import com.linkedin.thirdeye.client.DAORegistry;
import com.linkedin.thirdeye.client.MetricExpression;
import com.linkedin.thirdeye.client.MetricFunction;
import com.linkedin.thirdeye.client.ThirdEyeCacheRegistry;
import com.linkedin.thirdeye.client.cache.MetricDataset;
import com.linkedin.thirdeye.datalayer.bao.MetricConfigManager;
import com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO;
import com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO;
import com.linkedin.thirdeye.datalayer.pojo.AlertConfigBean.COMPARE_MODE;
import com.linkedin.thirdeye.datalayer.pojo.DashboardConfigBean;
import com.linkedin.thirdeye.datalayer.pojo.DatasetConfigBean;
import com.linkedin.thirdeye.datalayer.pojo.MetricConfigBean;
public abstract class ThirdEyeUtils {
private static final Logger LOG = LoggerFactory.getLogger(ThirdEyeUtils.class);
private static final String FILTER_VALUE_ASSIGNMENT_SEPARATOR = "=";
private static final String FILTER_CLAUSE_SEPARATOR = ";";
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final DAORegistry DAO_REGISTRY = DAORegistry.getInstance();
private static final ThirdEyeCacheRegistry CACHE_REGISTRY = ThirdEyeCacheRegistry.getInstance();
private static final String TWO_DECIMALS_FORMAT = "##.##";
private static final String MAX_DECIMALS_FORMAT = "##.#####";
private static final String DECIMALS_FORMAT_TOKEN = "#";
private ThirdEyeUtils () {
}
public static Multimap<String, String> getFilterSet(String filters) {
Multimap<String, String> filterSet = ArrayListMultimap.create();
if (StringUtils.isNotBlank(filters)) {
String[] filterClauses = filters.split(FILTER_CLAUSE_SEPARATOR);
for (String filterClause : filterClauses) {
String[] values = filterClause.split(FILTER_VALUE_ASSIGNMENT_SEPARATOR, 2);
if (values.length != 2) {
throw new IllegalArgumentException("Filter values assigments should in pairs: " + filters);
}
filterSet.put(values[0], values[1]);
}
}
return filterSet;
}
/**
* Returns or modifies a filter that can be for querying the results corresponding to the given dimension map.
*
* For example, if a dimension map = {country=IN,page_name=front_page}, then the two entries will be added or
* over-written to the given filter.
*
* Note that if the given filter contains an entry: country=["IN", "US", "TW",...], then this entry is replaced by
* country=IN.
*
* @param dimensionMap the dimension map to add to the filter
* @param filterToDecorate if it is null, a new filter will be created; otherwise, it is modified.
* @return a filter that is modified according to the given dimension map.
*/
public static Multimap<String, String> getFilterSetFromDimensionMap(DimensionMap dimensionMap,
Multimap<String, String> filterToDecorate) {
if (filterToDecorate == null) {
filterToDecorate = HashMultimap.create();
}
for (Map.Entry<String, String> entry : dimensionMap.entrySet()) {
String dimensionName = entry.getKey();
String dimensionValue = entry.getValue();
// If dimension value is "OTHER", then we need to get all data and calculate "OTHER" part.
// In order to reproduce the data for "OTHER", the filter should remain as is.
if ( !dimensionValue.equalsIgnoreCase("OTHER") ) {
// Only add the specific dimension value to the filter because other dimension values will not be used
filterToDecorate.removeAll(dimensionName);
filterToDecorate.put(dimensionName, dimensionValue);
}
}
return filterToDecorate;
}
public static String convertMultiMapToJson(Multimap<String, String> multimap)
throws JsonProcessingException {
Map<String, Collection<String>> map = multimap.asMap();
return OBJECT_MAPPER.writeValueAsString(map);
}
public static Multimap<String, String> convertToMultiMap(String json) {
ArrayListMultimap<String, String> multimap = ArrayListMultimap.create();
if (json == null) {
return multimap;
}
try {
TypeReference<Map<String, ArrayList<String>>> valueTypeRef =
new TypeReference<Map<String, ArrayList<String>>>() {
};
Map<String, ArrayList<String>> map;
map = OBJECT_MAPPER.readValue(json, valueTypeRef);
for (Map.Entry<String, ArrayList<String>> entry : map.entrySet()) {
ArrayList<String> valueList = entry.getValue();
ArrayList<String> trimmedList = new ArrayList<>();
for (String value : valueList) {
trimmedList.add(value.trim());
}
multimap.putAll(entry.getKey(), trimmedList);
}
return multimap;
} catch (IOException e) {
LOG.error("Error parsing json:{} message:{}", json, e.getMessage());
}
return multimap;
}
public static String getSortedFiltersFromMultiMap(Multimap<String, String> filterMultiMap) {
Set<String> filterKeySet = filterMultiMap.keySet();
ArrayList<String> filterKeyList = new ArrayList<String>(filterKeySet);
Collections.sort(filterKeyList);
StringBuilder sb = new StringBuilder();
for (String filterKey : filterKeyList) {
ArrayList<String> values = new ArrayList<String>(filterMultiMap.get(filterKey));
Collections.sort(values);
for (String value : values) {
sb.append(filterKey);
sb.append(FILTER_VALUE_ASSIGNMENT_SEPARATOR);
sb.append(value);
sb.append(FILTER_CLAUSE_SEPARATOR);
}
}
return StringUtils.chop(sb.toString());
}
public static String getSortedFilters(String filters) {
Multimap<String, String> filterMultiMap = getFilterSet(filters);
String sortedFilters = getSortedFiltersFromMultiMap(filterMultiMap);
if (StringUtils.isBlank(sortedFilters)) {
return null;
}
return sortedFilters;
}
public static String getSortedFiltersFromJson(String filterJson) {
Multimap<String, String> filterMultiMap = convertToMultiMap(filterJson);
String sortedFilters = getSortedFiltersFromMultiMap(filterMultiMap);
if (StringUtils.isBlank(sortedFilters)) {
return null;
}
return sortedFilters;
}
private static String getTimeFormatString(DatasetConfigDTO datasetConfig) {
String timeFormat = datasetConfig.getTimeFormat();
if (timeFormat.startsWith(TimeFormat.SIMPLE_DATE_FORMAT.toString())) {
timeFormat = getSDFPatternFromTimeFormat(timeFormat);
}
return timeFormat;
}
/**
* Returns the time spec of the buckets (data points) in the specified dataset config. For additive dataset, this
* method returns the same time spec as getTimestampTimeSpecFromDatasetConfig; however, for non-additive dataset,
* this method return the time spec for buckets (data points) instead of the one for the timestamp in the backend
* database. For example, the data points of a non-additive dataset could be 5-MINUTES granularity, but timestamp's
* granularity could be 1-Milliseconds. For additive dataset, the discrepancy is not an issue, but it could be
* a problem for non-additive dataset.
*
* @param datasetConfig the given dataset config
*
* @return the time spec of the buckets (data points) in the specified dataset config.
*/
public static TimeSpec getTimeSpecFromDatasetConfig(DatasetConfigDTO datasetConfig) {
String timeFormat = getTimeFormatString(datasetConfig);
TimeSpec timespec = new TimeSpec(datasetConfig.getTimeColumn(),
new TimeGranularity(datasetConfig.bucketTimeGranularity()), timeFormat);
return timespec;
}
/**
* Returns the time spec of the timestamp in the specified dataset config. The timestamp time spec is mainly used
* for constructing the queries to backend database. For most use case, this method returns the same time spec as
* getTimeSpecFromDatasetConfig(); however, if the dataset is non-additive, then getTimeSpecFromDatasetConfig
* should be used unless the application is related to database queries.
*
* @param datasetConfig the given dataset config
*
* @return the time spec of the timestamp in the specified dataset config.
*/
public static TimeSpec getTimestampTimeSpecFromDatasetConfig(DatasetConfigDTO datasetConfig) {
String timeFormat = getTimeFormatString(datasetConfig);
TimeSpec timespec = new TimeSpec(datasetConfig.getTimeColumn(),
new TimeGranularity(datasetConfig.getTimeDuration(), datasetConfig.getTimeUnit()), timeFormat);
return timespec;
}
private static String getSDFPatternFromTimeFormat(String timeFormat) {
String pattern = timeFormat;
String[] tokens = timeFormat.split(":");
if (tokens.length == 2) {
pattern = tokens[1];
}
return pattern;
}
public static MetricExpression getMetricExpressionFromMetricConfig(MetricConfigDTO metricConfig) {
String expression = null;
if (metricConfig.isDerived()) {
expression = metricConfig.getDerivedMetricExpression();
} else {
expression = MetricConfigBean.DERIVED_METRIC_ID_PREFIX + metricConfig.getId();
}
MetricExpression metricExpression = new MetricExpression(metricConfig.getName(), expression, metricConfig.getDataset());
return metricExpression;
}
// TODO: Write parser instead of looking for occurrence of every metric
public static String substituteMetricIdsForMetrics(String metricExpression, String dataset) {
MetricConfigManager metricConfigDAO = DAO_REGISTRY.getMetricConfigDAO();
List<MetricConfigDTO> metricConfigs = metricConfigDAO.findByDataset(dataset);
for (MetricConfigDTO metricConfig : metricConfigs) {
if (metricConfig.isDerived()) {
continue;
}
String metricName = metricConfig.getName();
metricExpression = metricExpression.replaceAll(metricName,
MetricConfigBean.DERIVED_METRIC_ID_PREFIX + metricConfig.getId());
}
return metricExpression;
}
public static String getDerivedMetricExpression(String metricExpressionName, String dataset) throws ExecutionException {
String derivedMetricExpression = null;
MetricDataset metricDataset = new MetricDataset(metricExpressionName, dataset);
MetricConfigDTO metricConfig = CACHE_REGISTRY.getMetricConfigCache().get(metricDataset);
if (metricConfig.isDerived()) {
derivedMetricExpression = metricConfig.getDerivedMetricExpression();
} else {
derivedMetricExpression = MetricConfigBean.DERIVED_METRIC_ID_PREFIX + metricConfig.getId();
}
return derivedMetricExpression;
}
public static Map<String, Double> getMetricThresholdsMap(List<MetricFunction> metricFunctions) {
Map<String, Double> metricThresholds = new HashMap<>();
for (MetricFunction metricFunction : metricFunctions) {
String derivedMetricExpression = metricFunction.getMetricName();
String metricId = derivedMetricExpression.replaceAll(MetricConfigBean.DERIVED_METRIC_ID_PREFIX, "");
MetricConfigDTO metricConfig = DAO_REGISTRY.getMetricConfigDAO().findById(Long.valueOf(metricId));
metricThresholds.put(derivedMetricExpression, metricConfig.getRollupThreshold());
}
return metricThresholds;
}
public static String getMetricNameFromFunction(MetricFunction metricFunction) {
String metricId = metricFunction.getMetricName().replace(MetricConfigBean.DERIVED_METRIC_ID_PREFIX, "");
MetricConfigDTO metricConfig = DAO_REGISTRY.getMetricConfigDAO().findById(Long.valueOf(metricId));
return metricConfig.getName();
}
public static Schema createSchema(CollectionSchema collectionSchema) {
Schema schema = new Schema();
for (DimensionSpec dimensionSpec : collectionSchema.getDimensions()) {
FieldSpec fieldSpec = new DimensionFieldSpec();
String dimensionName = dimensionSpec.getName();
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
}
for (MetricSpec metricSpec : collectionSchema.getMetrics()) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = metricSpec.getName();
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
fieldSpec.setSingleValueField(true);
schema.addField(metricName, fieldSpec);
}
TimeSpec timeSpec = collectionSchema.getTime();
String timeFormat = timeSpec.getFormat().equals("sinceEpoch") ? TimeFormat.EPOCH.toString()
: TimeFormat.SIMPLE_DATE_FORMAT.toString() + ":" + timeSpec.getFormat();
TimeGranularitySpec incoming =
new TimeGranularitySpec(DataType.LONG,
timeSpec.getDataGranularity().getSize(),
timeSpec.getDataGranularity().getUnit(),
timeFormat,
timeSpec.getColumnName());
TimeGranularitySpec outgoing =
new TimeGranularitySpec(DataType.LONG,
timeSpec.getDataGranularity().getSize(),
timeSpec.getDataGranularity().getUnit(),
timeFormat,
timeSpec.getColumnName());
schema.addField(timeSpec.getColumnName(), new TimeFieldSpec(incoming, outgoing));
schema.setSchemaName(collectionSchema.getCollection());
return schema;
}
public static String constructMetricAlias(String datasetName, String metricName) {
String alias = datasetName + MetricConfigBean.ALIAS_JOINER + metricName;
return alias;
}
public static String getDefaultDashboardName(String dataset) {
String dashboardName = DashboardConfigBean.DEFAULT_DASHBOARD_PREFIX + dataset;
return dashboardName;
}
//By default, query only offline, unless dataset has been marked as realtime
public static String computeTableName(String collection) {
String dataset = null;
try {
DatasetConfigDTO datasetConfig = CACHE_REGISTRY.getDatasetConfigCache().get(collection);
dataset = collection + DatasetConfigBean.DATASET_OFFLINE_PREFIX;
if (datasetConfig.isRealtime()) {
dataset = collection;
}
} catch (ExecutionException e) {
LOG.error("Exception in getting dataset name {}", collection, e);
}
return dataset;
}
public static Period getbaselineOffsetPeriodByMode(COMPARE_MODE compareMode) {
int numWeeksAgo = 1;
switch (compareMode) {
case Wo2W:
numWeeksAgo = 2;
break;
case Wo3W:
numWeeksAgo = 3;
break;
case Wo4W:
numWeeksAgo = 4;
break;
case WoW:
default:
numWeeksAgo = 1;
break;
}
return new Period(0, 0, 0, 7 * numWeeksAgo, 0, 0, 0, 0);
}
public static DatasetConfigDTO getDatasetConfigFromName(String dataset) {
DatasetConfigDTO datasetConfig = null;
try {
datasetConfig = CACHE_REGISTRY.getDatasetConfigCache().get(dataset);
} catch (ExecutionException e) {
LOG.error("Exception in getting dataset config {} from cache", dataset, e);
}
return datasetConfig;
}
public static String getDatasetFromMetricFunction(MetricFunction metricFunction) {
MetricConfigDTO metricConfig = getMetricConfigFromId(metricFunction.getMetricId());
return metricConfig.getDataset();
}
public static MetricConfigDTO getMetricConfigFromId(Long metricId) {
MetricConfigDTO metricConfig = null;
if (metricId != null) {
metricConfig = DAO_REGISTRY.getMetricConfigDAO().findById(metricId);
}
return metricConfig;
}
public static MetricConfigDTO getMetricConfigFromNameAndDataset(String metricName, String dataset) {
MetricConfigDTO metricConfig = null;
try {
metricConfig = CACHE_REGISTRY.getMetricConfigCache().get(new MetricDataset(metricName, dataset));
} catch (ExecutionException e) {
LOG.error("Exception while fetching metric by name {} and dataset {}", metricName, dataset, e);
}
return metricConfig;
}
/**
* Get rounded double value, according to the value of the double.
* Max rounding will be upto 4 decimals
* For values gte 0.1, use ##.## (eg. 123, 2.5, 1.26, 0.5, 0.162)
* For values lt 0.1 and gte 0.01, use ##.### (eg. 0.08, 0.071, 0.0123)
* For values lt 0.01 and gte 0.001, use ##.#### (eg. 0.001, 0.00367)
* This function ensures we don't prematurely round off double values to a fixed format, and make it 0.00 or lose out information
* @param value
* @return
*/
public static String getRoundedValue(double value) {
StringBuffer decimalFormatBuffer = new StringBuffer(TWO_DECIMALS_FORMAT);
double compareValue = 0.1;
while (value > 0 && value < compareValue && !decimalFormatBuffer.toString().equals(MAX_DECIMALS_FORMAT)) {
decimalFormatBuffer.append(DECIMALS_FORMAT_TOKEN);
compareValue = compareValue * 0.1;
}
DecimalFormat decimalFormat = new DecimalFormat(decimalFormatBuffer.toString());
return decimalFormat.format(value);
}
}