/** * Copyright (C) 2014-2015 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.thirdeye.hadoop.config; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.concurrent.TimeUnit; import org.apache.commons.lang.StringUtils; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.linkedin.pinot.common.data.TimeGranularitySpec.TimeFormat; import com.linkedin.thirdeye.hadoop.config.DimensionSpec; import com.linkedin.thirdeye.hadoop.config.MetricSpec; import com.linkedin.thirdeye.hadoop.config.MetricType; import com.linkedin.thirdeye.hadoop.config.SplitSpec; import com.linkedin.thirdeye.hadoop.config.TimeGranularity; import com.linkedin.thirdeye.hadoop.config.TimeSpec; import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec; import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec; /** * This class represents the configs required by the thirdeye-hadoop jobs * @param collection - name of the pinot table * @param dimensions - list of dimensionSpecs for dimensions * @param metrics - list of metricSpecs for metrics * @param time - time spec * @topKWhitelist - metric threshold, topk and whitelist spec * @split - split spec */ public final class ThirdEyeConfig { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory()); private static final String FIELD_SEPARATOR = ","; private static final String CONFIG_JOINER = "."; private static final String DEFAULT_TIME_TYPE = "HOURS"; private static final String DEFAULT_TIME_SIZE = "1"; private static final String DEFAULT_TIME_FORMAT = TimeFormat.EPOCH.toString(); private String collection; private List<DimensionSpec> dimensions; private List<MetricSpec> metrics; private TimeSpec inputTime = new TimeSpec(); private TimeSpec time = new TimeSpec(); private TopkWhitelistSpec topKWhitelist = new TopkWhitelistSpec(); private SplitSpec split = new SplitSpec(); public ThirdEyeConfig() { } public ThirdEyeConfig(String collection, List<DimensionSpec> dimensions, List<MetricSpec> metrics, TimeSpec inputTime, TimeSpec time, TopkWhitelistSpec topKWhitelist, SplitSpec split) { this.collection = collection; this.dimensions = dimensions; this.metrics = metrics; this.inputTime = inputTime; this.time = time; this.topKWhitelist = topKWhitelist; this.split = split; } public String getCollection() { return collection; } public List<DimensionSpec> getDimensions() { return dimensions; } @JsonIgnore public List<String> getDimensionNames() { List<String> results = new ArrayList<>(dimensions.size()); for (DimensionSpec dimensionSpec : dimensions) { results.add(dimensionSpec.getName()); } return results; } public List<MetricSpec> getMetrics() { return metrics; } @JsonIgnore public List<String> getMetricNames() { List<String> results = new ArrayList<>(metrics.size()); for (MetricSpec metricSpec : metrics) { results.add(metricSpec.getName()); } return results; } public TimeSpec getInputTime() { return inputTime; } public TimeSpec getTime() { return time; } public TopkWhitelistSpec getTopKWhitelist() { return topKWhitelist; } /** * Returns a set of all dimensions which have either topk or whitelist config * @return */ @JsonIgnore public Set<String> getTransformDimensions() { Set<String> transformDimensions = new HashSet<>(); if (topKWhitelist != null) { List<TopKDimensionToMetricsSpec> topk = topKWhitelist.getTopKDimensionToMetricsSpec(); if (topk != null) { for (TopKDimensionToMetricsSpec spec : topk) { transformDimensions.add(spec.getDimensionName()); } } } return transformDimensions; } public SplitSpec getSplit() { return split; } public String encode() throws IOException { return OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(this); } public static class Builder { private String collection; private List<DimensionSpec> dimensions; private List<MetricSpec> metrics; private TimeSpec inputTime = new TimeSpec(); private TimeSpec time = new TimeSpec(); private TopkWhitelistSpec topKWhitelist = new TopkWhitelistSpec(); private SplitSpec split = new SplitSpec(); public String getCollection() { return collection; } public Builder setCollection(String collection) { this.collection = collection; return this; } public List<DimensionSpec> getDimensions() { return dimensions; } public Builder setDimensions(List<DimensionSpec> dimensions) { this.dimensions = dimensions; return this; } public List<MetricSpec> getMetrics() { return metrics; } public Builder setMetrics(List<MetricSpec> metrics) { this.metrics = metrics; return this; } public TimeSpec getInputTime() { return inputTime; } public TimeSpec getTime() { return time; } public Builder setTime(TimeSpec time) { this.time = time; return this; } public TopkWhitelistSpec getTopKWhitelist() { return topKWhitelist; } public Builder setTopKWhitelist(TopkWhitelistSpec topKWhitelist) { this.topKWhitelist = topKWhitelist; return this; } public SplitSpec getSplit() { return split; } public Builder setSplit(SplitSpec split) { this.split = split; return this; } public ThirdEyeConfig build() throws Exception { if (collection == null) { throw new IllegalArgumentException("Must provide collection"); } if (dimensions == null || dimensions.isEmpty()) { throw new IllegalArgumentException("Must provide dimension names"); } if (metrics == null || metrics.isEmpty()) { throw new IllegalArgumentException("Must provide metric specs"); } return new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split); } } public static ThirdEyeConfig decode(InputStream inputStream) throws IOException { return OBJECT_MAPPER.readValue(inputStream, ThirdEyeConfig.class); } /** * Creates a ThirdEyeConfig object from the Properties object * @param props * @return */ public static ThirdEyeConfig fromProperties(Properties props) { String collection = getCollectionFromProperties(props); List<DimensionSpec> dimensions = getDimensionFromProperties(props); List<MetricSpec> metrics = getMetricsFromProperties(props); TimeSpec inputTime = getInputTimeFromProperties(props); TimeSpec time = getTimeFromProperties(props); SplitSpec split = getSplitFromProperties(props); TopkWhitelistSpec topKWhitelist = getTopKWhitelistFromProperties(props); ThirdEyeConfig thirdeyeConfig = new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split); return thirdeyeConfig; } private static TopkWhitelistSpec getTopKWhitelistFromProperties(Properties props) { TopkWhitelistSpec topKWhitelist = null; Map<String, Double> threshold = getThresholdFromProperties(props); List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = getTopKDimensionToMetricsSpecFromProperties(props); Map<String, String> whitelist = getWhitelistFromProperties(props); if (threshold != null || topKDimensionToMetricsSpec != null || whitelist != null) { topKWhitelist = new TopkWhitelistSpec(); topKWhitelist.setThreshold(threshold); topKWhitelist.setTopKDimensionToMetricsSpec(topKDimensionToMetricsSpec); topKWhitelist.setWhitelist(whitelist); } return topKWhitelist; } private static Map<String, String> getWhitelistFromProperties(Properties props) { Map<String, String> whitelist = null; String whitelistDimensions = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), null); if (whitelistDimensions != null && whitelistDimensions.split(FIELD_SEPARATOR).length > 0) { whitelist = new HashMap<>(); for (String dimension : whitelistDimensions.split(FIELD_SEPARATOR)) { String dimensionWhitelist = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + CONFIG_JOINER + dimension); whitelist.put(dimension, dimensionWhitelist); } } return whitelist; } private static List<TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpecFromProperties(Properties props) { List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = null; String topKDimensionNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), null); if (StringUtils.isNotEmpty(topKDimensionNames) && topKDimensionNames.split(FIELD_SEPARATOR).length > 0) { topKDimensionToMetricsSpec = new ArrayList<>(); for (String dimension : topKDimensionNames.split(FIELD_SEPARATOR)) { String[] topKDimensionMetrics = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + CONFIG_JOINER + dimension) .split(FIELD_SEPARATOR); String[] topKDimensionKValues = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + CONFIG_JOINER + dimension) .split(FIELD_SEPARATOR); if (topKDimensionMetrics.length != topKDimensionKValues.length) { throw new IllegalStateException("Number of topk metric names and kvalues should be same for a dimension"); } Map<String, Integer> topk = new HashMap<>(); for (int i = 0; i < topKDimensionMetrics.length; i++) { topk.put(topKDimensionMetrics[i], Integer.parseInt(topKDimensionKValues[i])); } topKDimensionToMetricsSpec.add(new TopKDimensionToMetricsSpec(dimension, topk)); } } return topKDimensionToMetricsSpec; } private static Map<String, Double> getThresholdFromProperties(Properties props) { Map<String, Double> threshold = null; String thresholdMetricNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString(), null); String metricThresholdValues = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), null); if (thresholdMetricNames != null && metricThresholdValues != null) { String[] thresholdMetrics = thresholdMetricNames.split(FIELD_SEPARATOR); String[] thresholdValues = metricThresholdValues.split(FIELD_SEPARATOR); if (thresholdMetrics.length != thresholdValues.length) { throw new IllegalStateException("Number of threshold metric names should be same as threshold values"); } threshold = new HashMap<>(); for (int i = 0; i < thresholdMetrics.length; i++) { threshold.put(thresholdMetrics[i], Double.parseDouble(thresholdValues[i])); } } return threshold; } private static SplitSpec getSplitFromProperties(Properties props) { SplitSpec split = null; String splitThreshold = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_SPLIT_THRESHOLD.toString(), null); if (splitThreshold != null) { String splitOrder = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_SPLIT_ORDER.toString(), null); List<String> splitOrderList = null; if (splitOrder != null) { splitOrderList = Arrays.asList(splitOrder.split(FIELD_SEPARATOR)); } split = new SplitSpec(Integer.parseInt(splitThreshold), splitOrderList); } return split; } private static TimeSpec getTimeFromProperties(Properties props) { String timeColumnName = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString()); String timeColumnType = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_TYPE.toString(), DEFAULT_TIME_TYPE); String timeColumnSize = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_SIZE.toString(), DEFAULT_TIME_SIZE); TimeGranularity timeGranularity = new TimeGranularity(Integer.parseInt(timeColumnSize), TimeUnit.valueOf(timeColumnType)); String timeFormat = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_FORMAT.toString(), DEFAULT_TIME_FORMAT); TimeSpec time = new TimeSpec(timeColumnName, timeGranularity, timeFormat); return time; } private static TimeSpec getInputTimeFromProperties(Properties props) { TimeSpec inputTime = null; String timeColumnName = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString()); String timeColumnType = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_TYPE.toString(), null); String timeColumnSize = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_SIZE.toString(), null); String timeFormat = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_FORMAT.toString(), DEFAULT_TIME_FORMAT); if (timeColumnType != null && timeColumnSize != null) { TimeGranularity timeGranularity = new TimeGranularity(Integer.parseInt(timeColumnSize), TimeUnit.valueOf(timeColumnType)); inputTime = new TimeSpec(timeColumnName, timeGranularity, timeFormat); } return inputTime; } private static List<MetricSpec> getMetricsFromProperties(Properties props) { List<MetricSpec> metrics = new ArrayList<>(); String[] metricNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()).split(FIELD_SEPARATOR); String[] metricTypes = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()).split(FIELD_SEPARATOR); if (metricNames.length != metricTypes.length) { throw new IllegalStateException("Number of metric names provided " + "should be same as number of metric types"); } for (int i = 0; i < metricNames.length; i++) { metrics.add(new MetricSpec(metricNames[i], MetricType.valueOf(metricTypes[i]))); } return metrics; } private static List<DimensionSpec> getDimensionFromProperties(Properties props) { List<DimensionSpec> dimensions = new ArrayList<>(); String[] dimensionNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()).split(FIELD_SEPARATOR); for (String dimension : dimensionNames) { dimensions.add(new DimensionSpec(dimension)); } return dimensions; } private static String getCollectionFromProperties(Properties props) { String collection = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString()); return collection; } private static String getAndCheck(Properties props, String propName) { String propValue = props.getProperty(propName); if (propValue == null) { throw new IllegalArgumentException(propName + " required property"); } return propValue; } private static String getAndCheck(Properties props, String propName, String defaultValue) { String propValue = props.getProperty(propName, defaultValue); return propValue; } }