/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.common.data; import com.google.common.base.Preconditions; import com.linkedin.pinot.common.data.FieldSpec.DataType; import com.linkedin.pinot.common.data.FieldSpec.FieldType; import com.linkedin.pinot.common.utils.EqualityUtils; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import javax.annotation.Nullable; import org.codehaus.jackson.annotate.JsonIgnore; import org.codehaus.jackson.annotate.JsonIgnoreProperties; import org.codehaus.jackson.map.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * The <code>Schema</code> class is defined for each table to describe the details of the table's fields (columns). * <p>Three field types are supported: DIMENSION, METRIC, TIME. * ({@link com.linkedin.pinot.common.data.DimensionFieldSpec}, {@link com.linkedin.pinot.common.data.MetricFieldSpec}, * {@link com.linkedin.pinot.common.data.TimeFieldSpec}) * <p>For each field, a {@link com.linkedin.pinot.common.data.FieldSpec} is defined to provide the details of the field. * <p>There could be multiple DIMENSION or METRIC fields, but at most 1 TIME field. * <p>In pinot, we store data using 5 <code>DataType</code>s: INT, LONG, FLOAT, DOUBLE, STRING. All other * <code>DataType</code>s will be converted to one of them. */ @JsonIgnoreProperties(ignoreUnknown = true) public final class Schema { private static final Logger LOGGER = LoggerFactory.getLogger(Schema.class); private static final ObjectMapper MAPPER = new ObjectMapper(); private String _schemaName; private final List<DimensionFieldSpec> _dimensionFieldSpecs = new ArrayList<>(); private final List<MetricFieldSpec> _metricFieldSpecs = new ArrayList<>(); private TimeFieldSpec _timeFieldSpec; private final Map<String, FieldSpec> _fieldSpecMap = new HashMap<>(); private final Set<String> _dimensionSet = new HashSet<>(); private final Set<String> _metricSet = new HashSet<>(); private final List<String> _dimensionList = new ArrayList<>(); private final List<String> _metricList = new ArrayList<>(); private transient String _jsonSchema; @Nonnull public static Schema fromFile(@Nonnull File schemaFile) throws IOException { return MAPPER.readValue(schemaFile, Schema.class); } @Nonnull public static Schema fromString(@Nonnull String schemaString) throws IOException { return MAPPER.readValue(schemaString, Schema.class); } @Nonnull public static Schema fromInputSteam(@Nonnull InputStream schemaInputStream) throws IOException { return MAPPER.readValue(schemaInputStream, Schema.class); } @Nonnull public String getSchemaName() { return _schemaName; } public void setSchemaName(@Nonnull String schemaName) { _schemaName = schemaName; } @Nonnull public List<DimensionFieldSpec> getDimensionFieldSpecs() { return _dimensionFieldSpecs; } public void setDimensionFieldSpecs(@Nonnull List<DimensionFieldSpec> dimensionFieldSpecs) { Preconditions.checkState(_dimensionFieldSpecs.isEmpty()); for (DimensionFieldSpec dimensionFieldSpec : dimensionFieldSpecs) { addField(dimensionFieldSpec); } } @Nonnull public List<MetricFieldSpec> getMetricFieldSpecs() { return _metricFieldSpecs; } public void setMetricFieldSpecs(@Nonnull List<MetricFieldSpec> metricFieldSpecs) { Preconditions.checkState(_metricFieldSpecs.isEmpty()); for (MetricFieldSpec metricFieldSpec : metricFieldSpecs) { addField(metricFieldSpec); } } @Nullable public TimeFieldSpec getTimeFieldSpec() { return _timeFieldSpec; } public void setTimeFieldSpec(@Nullable TimeFieldSpec timeFieldSpec) { if (timeFieldSpec != null) { addField(timeFieldSpec); } } public void addField(@Nonnull FieldSpec fieldSpec) { Preconditions.checkNotNull(fieldSpec); String columnName = fieldSpec.getName(); Preconditions.checkNotNull(columnName); Preconditions.checkState(!_fieldSpecMap.containsKey(columnName), "Field spec already exists for column: " + columnName); FieldType fieldType = fieldSpec.getFieldType(); switch (fieldType) { case DIMENSION: if (!_dimensionSet.contains(columnName)) { _dimensionSet.add(columnName); _dimensionList.add(columnName); } _dimensionFieldSpecs.add((DimensionFieldSpec) fieldSpec); break; case METRIC: if (!_metricSet.contains(columnName)) { _metricSet.add(columnName); _metricList.add(columnName); } _metricFieldSpecs.add((MetricFieldSpec) fieldSpec); break; case TIME: Preconditions.checkState(_timeFieldSpec == null, "Already defined the time column: " + _timeFieldSpec); _timeFieldSpec = (TimeFieldSpec) fieldSpec; break; default: throw new UnsupportedOperationException("Unsupported field type: " + fieldType); } _fieldSpecMap.put(columnName, fieldSpec); } @Deprecated // For third-eye backward compatible. public void addField(@Nonnull String columnName, @Nonnull FieldSpec fieldSpec) { addField(fieldSpec); } public boolean hasColumn(@Nonnull String columnName) { return _fieldSpecMap.containsKey(columnName); } @JsonIgnore @Nonnull public Map<String, FieldSpec> getFieldSpecMap() { return _fieldSpecMap; } @JsonIgnore @Nonnull public Collection<String> getColumnNames() { return _fieldSpecMap.keySet(); } @JsonIgnore @Nonnull public Collection<FieldSpec> getAllFieldSpecs() { return _fieldSpecMap.values(); } public int size() { return _fieldSpecMap.size(); } @JsonIgnore @Nullable public FieldSpec getFieldSpecFor(@Nonnull String columnName) { return _fieldSpecMap.get(columnName); } @JsonIgnore @Nullable public MetricFieldSpec getMetricSpec(@Nonnull String metricName) { FieldSpec fieldSpec = _fieldSpecMap.get(metricName); if (fieldSpec != null && fieldSpec.getFieldType() == FieldType.METRIC) { return (MetricFieldSpec) fieldSpec; } return null; } @JsonIgnore @Nullable public DimensionFieldSpec getDimensionSpec(@Nonnull String dimensionName) { FieldSpec fieldSpec = _fieldSpecMap.get(dimensionName); if (fieldSpec != null && fieldSpec.getFieldType() == FieldType.DIMENSION) { return (DimensionFieldSpec) fieldSpec; } return null; } @JsonIgnore @Nonnull public List<String> getDimensionNames() { return _dimensionList; } @JsonIgnore @Nonnull public List<String> getMetricNames() { return _metricList; } @JsonIgnore @Nullable public String getTimeColumnName() { return (_timeFieldSpec != null) ? _timeFieldSpec.getName() : null; } @JsonIgnore @Nullable public TimeUnit getIncomingTimeUnit() { return (_timeFieldSpec != null) ? _timeFieldSpec.getIncomingGranularitySpec().getTimeType() : null; } @JsonIgnore @Nullable public TimeUnit getOutgoingTimeUnit() { return (_timeFieldSpec != null) ? _timeFieldSpec.getOutgoingGranularitySpec().getTimeType() : null; } @JsonIgnore @Nonnull public String getJSONSchema() { if (_jsonSchema == null) { try { _jsonSchema = MAPPER.writeValueAsString(this); } catch (IOException e) { throw new RuntimeException("Caught exception while writing Schema as JSON format string.", e); } } return _jsonSchema; } /** * Validates a pinot schema. The following validations are performed: * <p>- For dimension and time fields, support {@link DataType}: INT, LONG, FLOAT, DOUBLE, STRING. * <p>- For metric fields (non-derived), support {@link DataType}: INT, LONG, FLOAT, DOUBLE. * <p>- All fields must have a default null value. * * @param ctxLogger logger used to log the message (if null, the current class logger is used). * @return whether schema is valid. */ public boolean validate(Logger ctxLogger) { if (ctxLogger == null) { ctxLogger = LOGGER; } boolean isValid = true; // Log ALL the schema errors that may be present. for (FieldSpec fieldSpec : _fieldSpecMap.values()) { FieldType fieldType = fieldSpec.getFieldType(); DataType dataType = fieldSpec.getDataType(); String fieldName = fieldSpec.getName(); try { switch (fieldType) { case DIMENSION: case TIME: switch (dataType) { case INT: case LONG: case FLOAT: case DOUBLE: case STRING: // Check getDefaultNullValue() does not throw exception. fieldSpec.getDefaultNullValue(); break; default: ctxLogger.info("Unsupported data type: {} in DIMENSION/TIME field: {}", dataType, fieldName); isValid = false; break; } break; case METRIC: switch (dataType) { case INT: case LONG: case FLOAT: case DOUBLE: // Check getDefaultNullValue() does not throw exception. fieldSpec.getDefaultNullValue(); break; default: ctxLogger.info("Unsupported data type: {} in METRIC field: {}", dataType, fieldName); isValid = false; break; } break; default: ctxLogger.info("Unsupported field type: {} for field: {}", dataType, fieldName); isValid = false; break; } } catch (Exception e) { ctxLogger.info("Caught exception while validating field: {} with field type: {}, data type: {}, {}", fieldName, fieldType, dataType, e.getMessage()); isValid = false; } } return isValid; } public static class SchemaBuilder { private Schema _schema; public SchemaBuilder() { _schema = new Schema(); } public SchemaBuilder setSchemaName(@Nonnull String schemaName) { _schema.setSchemaName(schemaName); return this; } public SchemaBuilder addSingleValueDimension(@Nonnull String dimensionName, @Nonnull DataType dataType) { _schema.addField(new DimensionFieldSpec(dimensionName, dataType, true)); return this; } public SchemaBuilder addSingleValueDimension(@Nonnull String dimensionName, @Nonnull DataType dataType, @Nonnull Object defaultNullValue) { _schema.addField(new DimensionFieldSpec(dimensionName, dataType, true, defaultNullValue)); return this; } public SchemaBuilder addMultiValueDimension(@Nonnull String dimensionName, @Nonnull DataType dataType) { _schema.addField(new DimensionFieldSpec(dimensionName, dataType, false)); return this; } public SchemaBuilder addMultiValueDimension(@Nonnull String dimensionName, @Nonnull DataType dataType, @Nonnull Object defaultNullValue) { _schema.addField(new DimensionFieldSpec(dimensionName, dataType, false, defaultNullValue)); return this; } public SchemaBuilder addMetric(@Nonnull String metricName, @Nonnull DataType dataType) { _schema.addField(new MetricFieldSpec(metricName, dataType)); return this; } public SchemaBuilder addMetric(@Nonnull String metricName, @Nonnull DataType dataType, @Nonnull Object defaultNullValue) { _schema.addField(new MetricFieldSpec(metricName, dataType, defaultNullValue)); return this; } public SchemaBuilder addMetric(@Nonnull String name, @Nonnull DataType dataType, int fieldSize, @Nonnull MetricFieldSpec.DerivedMetricType derivedMetricType) { _schema.addField(new MetricFieldSpec(name, dataType, fieldSize, derivedMetricType)); return this; } public SchemaBuilder addMetric(@Nonnull String name, @Nonnull DataType dataType, int fieldSize, @Nonnull MetricFieldSpec.DerivedMetricType derivedMetricType, @Nonnull Object defaultNullValue) { _schema.addField(new MetricFieldSpec(name, dataType, fieldSize, derivedMetricType, defaultNullValue)); return this; } public SchemaBuilder addTime(@Nonnull String incomingName, @Nonnull TimeUnit incomingTimeUnit, @Nonnull DataType incomingDataType) { _schema.addField(new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit)); return this; } public SchemaBuilder addTime(@Nonnull String incomingName, @Nonnull TimeUnit incomingTimeUnit, @Nonnull DataType incomingDataType, @Nonnull Object defaultNullValue) { _schema.addField(new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit, defaultNullValue)); return this; } public SchemaBuilder addTime(@Nonnull String incomingName, @Nonnull TimeUnit incomingTimeUnit, @Nonnull DataType incomingDataType, @Nonnull String outgoingName, @Nonnull TimeUnit outgoingTimeUnit, @Nonnull DataType outgoingDataType) { _schema.addField( new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnit)); return this; } public SchemaBuilder addTime(@Nonnull String incomingName, @Nonnull TimeUnit incomingTimeUnit, @Nonnull DataType incomingDataType, @Nonnull String outgoingName, @Nonnull TimeUnit outgoingTimeUnit, @Nonnull DataType outgoingDataType, @Nonnull Object defaultNullValue) { _schema.addField( new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnit, defaultNullValue)); return this; } public SchemaBuilder addTime(@Nonnull String incomingName, int incomingTimeUnitSize, @Nonnull TimeUnit incomingTimeUnit, @Nonnull DataType incomingDataType) { _schema.addField(new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit)); return this; } public SchemaBuilder addTime(@Nonnull String incomingName, int incomingTimeUnitSize, @Nonnull TimeUnit incomingTimeUnit, @Nonnull DataType incomingDataType, @Nonnull Object defaultNullValue) { _schema.addField( new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit, defaultNullValue)); return this; } public SchemaBuilder addTime(@Nonnull String incomingName, int incomingTimeUnitSize, @Nonnull TimeUnit incomingTimeUnit, @Nonnull DataType incomingDataType, @Nonnull String outgoingName, int outgoingTimeUnitSize, @Nonnull TimeUnit outgoingTimeUnit, @Nonnull DataType outgoingDataType) { _schema.addField( new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnitSize, outgoingTimeUnit)); return this; } public SchemaBuilder addTime(@Nonnull String incomingName, int incomingTimeUnitSize, @Nonnull TimeUnit incomingTimeUnit, @Nonnull DataType incomingDataType, @Nonnull String outgoingName, int outgoingTimeUnitSize, @Nonnull TimeUnit outgoingTimeUnit, @Nonnull DataType outgoingDataType, @Nonnull Object defaultNullValue) { _schema.addField( new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnitSize, outgoingTimeUnit, defaultNullValue)); return this; } public SchemaBuilder addTime(@Nonnull TimeGranularitySpec incomingTimeGranularitySpec) { _schema.addField(new TimeFieldSpec(incomingTimeGranularitySpec)); return this; } public SchemaBuilder addTime(@Nonnull TimeGranularitySpec incomingTimeGranularitySpec, @Nonnull Object defaultNullValue) { _schema.addField(new TimeFieldSpec(incomingTimeGranularitySpec, defaultNullValue)); return this; } public SchemaBuilder addTime(@Nonnull TimeGranularitySpec incomingTimeGranularitySpec, @Nonnull TimeGranularitySpec outgoingTimeGranularitySpec) { _schema.addField(new TimeFieldSpec(incomingTimeGranularitySpec, outgoingTimeGranularitySpec)); return this; } public SchemaBuilder addTime(@Nonnull TimeGranularitySpec incomingTimeGranularitySpec, @Nonnull TimeGranularitySpec outgoingTimeGranularitySpec, @Nonnull Object defaultNullValue) { _schema.addField(new TimeFieldSpec(incomingTimeGranularitySpec, outgoingTimeGranularitySpec, defaultNullValue)); return this; } public Schema build() { if (!_schema.validate(LOGGER)) { throw new RuntimeException("Invalid schema"); } return _schema; } } @Override public String toString() { return getJSONSchema(); } @Override public boolean equals(Object object) { if (this == object) { return true; } if (object instanceof Schema) { Schema that = (Schema) object; return _schemaName.equals(that._schemaName) && _fieldSpecMap.equals(that._fieldSpecMap); } return false; } @Override public int hashCode() { return EqualityUtils.hashCodeOf(_schemaName.hashCode(), _fieldSpecMap); } }