/**
* Copyright (C) 2014-2015 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.thirdeye.hadoop.util;
import java.io.IOException;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.linkedin.pinot.common.data.DimensionFieldSpec;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.FieldSpec.DataType;
import com.linkedin.pinot.common.data.MetricFieldSpec;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.data.TimeFieldSpec;
import com.linkedin.pinot.common.data.TimeGranularitySpec;
import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
import com.linkedin.thirdeye.hadoop.config.MetricSpec;
import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
/**
* This class contains the methods needed to transform
* a ThirdEyeConfig into a Pinot Schema
*/
public class ThirdeyePinotSchemaUtils {
private static Logger LOGGER = LoggerFactory.getLogger(ThirdeyePinotSchemaUtils.class);
/**
* Transforms the thirdeyeConfig to pinot schema
* Adds default __COUNT metric if not already present
* Adds additional columns for all dimensions which
* are wither specified as topk or whitelist
* and hence have a transformed new column_raw
* @param thirdeyeConfig
* @return
*/
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
Schema schema = new Schema();
Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
FieldSpec fieldSpec = new DimensionFieldSpec();
String dimensionName = dimensionSpec.getName();
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
if (transformDimensions.contains(dimensionName)) {
fieldSpec = new DimensionFieldSpec();
dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
}
}
boolean countIncluded = false;
for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = metricSpec.getName();
if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
countIncluded = true;
}
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
fieldSpec.setSingleValueField(true);
schema.addField(metricName, fieldSpec);
}
if (!countIncluded) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.LONG);
fieldSpec.setDefaultNullValue(1);
schema.addField(metricName, fieldSpec);
}
TimeGranularitySpec incoming =
new TimeGranularitySpec(DataType.LONG,
thirdeyeConfig.getTime().getTimeGranularity().getSize(),
thirdeyeConfig.getTime().getTimeGranularity().getUnit(),
thirdeyeConfig.getTime().getTimeFormat(),
thirdeyeConfig.getTime().getColumnName());
TimeGranularitySpec outgoing =
new TimeGranularitySpec(DataType.LONG,
thirdeyeConfig.getTime().getTimeGranularity().getSize(),
thirdeyeConfig.getTime().getTimeGranularity().getUnit(),
thirdeyeConfig.getTime().getTimeFormat(),
thirdeyeConfig.getTime().getColumnName());
schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
schema.setSchemaName(thirdeyeConfig.getCollection());
return schema;
}
public static Schema createSchema(String configPath) throws IOException {
FileSystem fs = FileSystem.get(new Configuration());
ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.decode(fs.open(new Path(configPath)));
LOGGER.info("{}", thirdeyeConfig);
return createSchema(thirdeyeConfig);
}
}