/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.common.data; import com.linkedin.pinot.common.utils.DataSchema; import javax.annotation.Nonnull; import org.apache.avro.Schema.Type; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; /** * The <code>FieldSpec</code> class contains all specs related to any field (column) in {@link Schema}. * <p>There are 3 types of <code>FieldSpec</code>: * {@link DimensionFieldSpec}, {@link MetricFieldSpec}, {@link TimeFieldSpec} * <p>Specs stored are as followings: * <p>- <code>Name</code>: name of the field. * <p>- <code>DataType</code>: type of the data stored (e.g. INTEGER, LONG, FLOAT, DOUBLE, STRING). * <p>- <code>IsSingleValueField</code>: single-value or multi-value field. * <p>- <code>DefaultNullValue</code>: when no value found for this field, use this value. Stored in string format. */ public abstract class FieldSpec { private static final Integer DEFAULT_DIM_NULL_VALUE_OF_INT = Integer.MIN_VALUE; private static final Long DEFAULT_DIM_NULL_VALUE_OF_LONG = Long.MIN_VALUE; private static final Float DEFAULT_DIM_NULL_VALUE_OF_FLOAT = Float.NEGATIVE_INFINITY; private static final Double DEFAULT_DIM_NULL_VALUE_OF_DOUBLE = Double.NEGATIVE_INFINITY; private static final String DEFAULT_DIM_NULL_VALUE_OF_STRING = "null"; private static final Integer DEFAULT_METRIC_NULL_VALUE_OF_INT = 0; private static final Long DEFAULT_METRIC_NULL_VALUE_OF_LONG = 0L; private static final Float DEFAULT_METRIC_NULL_VALUE_OF_FLOAT = 0.0F; private static final Double DEFAULT_METRIC_NULL_VALUE_OF_DOUBLE = 0.0D; private static final String DEFAULT_METRIC_NULL_VALUE_OF_STRING = "null"; private String _name; private DataType _dataType; private boolean _isSingleValueField = true; private String _stringDefaultNullValue; private Object _cachedDefaultNullValue; // Default constructor required by JSON de-serializer. DO NOT REMOVE. public FieldSpec() { } public FieldSpec(@Nonnull String name, @Nonnull DataType dataType, boolean isSingleValueField) { _name = name; _dataType = dataType.getStoredType(); _isSingleValueField = isSingleValueField; } public FieldSpec(@Nonnull String name, @Nonnull DataType dataType, boolean isSingleValueField, @Nonnull Object defaultNullValue) { _name = name; _dataType = dataType.getStoredType(); _isSingleValueField = isSingleValueField; _stringDefaultNullValue = defaultNullValue.toString(); } @Nonnull public abstract FieldType getFieldType(); @Nonnull public String getName() { return _name; } public void setName(@Nonnull String name) { _name = name; } @Nonnull public DataType getDataType() { return _dataType; } public void setDataType(@Nonnull DataType dataType) { _dataType = dataType.getStoredType(); _cachedDefaultNullValue = null; } public boolean isSingleValueField() { return _isSingleValueField; } public void setSingleValueField(boolean isSingleValueField) { _isSingleValueField = isSingleValueField; } @Nonnull public Object getDefaultNullValue() { FieldType fieldType = getFieldType(); if (_cachedDefaultNullValue == null) { if (_stringDefaultNullValue != null) { switch (_dataType) { case INT: _cachedDefaultNullValue = Integer.valueOf(_stringDefaultNullValue); break; case LONG: _cachedDefaultNullValue = Long.valueOf(_stringDefaultNullValue); break; case FLOAT: _cachedDefaultNullValue = Float.valueOf(_stringDefaultNullValue); break; case DOUBLE: _cachedDefaultNullValue = Double.valueOf(_stringDefaultNullValue); break; case STRING: _cachedDefaultNullValue = _stringDefaultNullValue; break; default: throw new UnsupportedOperationException("Unsupported data type: " + _dataType); } } else { switch (fieldType) { case METRIC: switch (_dataType) { case INT: _cachedDefaultNullValue = DEFAULT_METRIC_NULL_VALUE_OF_INT; break; case LONG: _cachedDefaultNullValue = DEFAULT_METRIC_NULL_VALUE_OF_LONG; break; case FLOAT: _cachedDefaultNullValue = DEFAULT_METRIC_NULL_VALUE_OF_FLOAT; break; case DOUBLE: _cachedDefaultNullValue = DEFAULT_METRIC_NULL_VALUE_OF_DOUBLE; break; case STRING: _cachedDefaultNullValue = DEFAULT_METRIC_NULL_VALUE_OF_STRING; break; default: throw new UnsupportedOperationException( "Unknown default null value for metric field of data type: " + _dataType); } break; case DIMENSION: case TIME: switch (_dataType) { case INT: _cachedDefaultNullValue = DEFAULT_DIM_NULL_VALUE_OF_INT; break; case LONG: _cachedDefaultNullValue = DEFAULT_DIM_NULL_VALUE_OF_LONG; break; case FLOAT: _cachedDefaultNullValue = DEFAULT_DIM_NULL_VALUE_OF_FLOAT; break; case DOUBLE: _cachedDefaultNullValue = DEFAULT_DIM_NULL_VALUE_OF_DOUBLE; break; case STRING: _cachedDefaultNullValue = DEFAULT_DIM_NULL_VALUE_OF_STRING; break; default: throw new UnsupportedOperationException( "Unknown default null value for dimension/time field of data type: " + _dataType); } break; default: throw new UnsupportedOperationException("Unsupported field type: " + fieldType); } } } return _cachedDefaultNullValue; } public void setDefaultNullValue(@Nonnull Object defaultNullValue) { _stringDefaultNullValue = defaultNullValue.toString(); _cachedDefaultNullValue = null; } /** * The <code>FieldType</code> enum is used to demonstrate the real world business logic for a column. * <p><code>DIMENSION</code>: columns used to filter records. * <p><code>METRIC</code>: columns used to apply aggregation on. <code>METRIC</code> field only contains numeric data. * <p><code>TIME</code>: time column (at most one per {@link Schema}). <code>TIME</code> field can be used to prune * segments, otherwise treated the same as <code>DIMENSION</code> field. */ public enum FieldType { DIMENSION, METRIC, TIME } /** * The <code>DataType</code> enum is used to demonstrate the data type of a column. * <p>Array <code>DataType</code> is only used in {@link DataSchema}. * <p>In {@link Schema}, use non-array <code>DataType</code> only. * <p>In pinot, we store data using 5 <code>DataType</code>s: INT, LONG, FLOAT, DOUBLE, STRING. All other * <code>DataType</code>s will be converted to one of them. */ public enum DataType { BOOLEAN, // Stored as STRING. BYTE, // Stored as INT. CHAR, // Stored as STRING. SHORT, // Stored as INT. INT, LONG, FLOAT, DOUBLE, STRING, OBJECT, // Used in dataTable to transfer data structure. //EVERYTHING AFTER THIS MUST BE ARRAY TYPE BYTE_ARRAY, // Unused. CHAR_ARRAY, // Unused. SHORT_ARRAY, // Unused. INT_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY, STRING_ARRAY; public boolean isNumber() { switch (this) { case BYTE: case SHORT: case INT: case LONG: case FLOAT: case DOUBLE: return true; default: return false; } } public boolean isInteger() { switch (this) { case BYTE: case SHORT: case INT: case LONG: return true; default: return false; } } public boolean isSingleValue() { return this.ordinal() < BYTE_ARRAY.ordinal(); } public DataType toMultiValue() { switch (this) { case BYTE: return BYTE_ARRAY; case CHAR: return CHAR_ARRAY; case INT: return INT_ARRAY; case LONG: return LONG_ARRAY; case FLOAT: return FLOAT_ARRAY; case DOUBLE: return DOUBLE_ARRAY; case STRING: return STRING_ARRAY; default: throw new UnsupportedOperationException("Unsupported toMultiValue for data type: " + this); } } public DataType toSingleValue() { switch (this) { case BYTE_ARRAY: return BYTE; case CHAR_ARRAY: return CHAR; case INT_ARRAY: return INT; case LONG_ARRAY: return LONG; case FLOAT_ARRAY: return FLOAT; case DOUBLE_ARRAY: return DOUBLE; case STRING_ARRAY: return STRING; default: throw new UnsupportedOperationException("Unsupported toSingleValue for data type: " + this); } } public boolean isCompatible(DataType anotherDataType) { // Single-value is not compatible with multi-value. if (isSingleValue() != anotherDataType.isSingleValue()) { return false; } // Number is not compatible with String. if (isSingleValue()) { if (isNumber() != anotherDataType.isNumber()) { return false; } } else { if (toSingleValue().isNumber() != anotherDataType.toSingleValue().isNumber()) { return false; } } return true; } /** * Return the {@link DataType} stored in pinot. */ public DataType getStoredType() { switch (this) { case BYTE: case SHORT: case INT: return INT; case LONG: return LONG; case FLOAT: return FLOAT; case DOUBLE: return DOUBLE; case BOOLEAN: case CHAR: case STRING: return STRING; default: throw new UnsupportedOperationException("Unsupported data type: " + this); } } /** * Return the {@link DataType} associate with the {@link Type} */ public static DataType valueOf(Type avroType) { switch (avroType) { case INT: return INT; case LONG: return LONG; case FLOAT: return FLOAT; case DOUBLE: return DOUBLE; case BOOLEAN: case STRING: case ENUM: return STRING; default: throw new UnsupportedOperationException("Unsupported Avro type: " + avroType); } } /** * Return number of bytes needed for storage. */ public int size() { switch (this) { case INT: return 4; case LONG: return 8; case FLOAT: return 4; case DOUBLE: return 8; default: throw new UnsupportedOperationException("Cannot get number of bytes for: " + this); } } public JSONObject toJSONSchemaFor(String column) throws JSONException { final JSONObject ret = new JSONObject(); ret.put("name", column); ret.put("doc", "data sample from load generator"); switch (this) { case INT: final JSONArray intType = new JSONArray(); intType.put("null"); intType.put("int"); ret.put("type", intType); return ret; case LONG: final JSONArray longType = new JSONArray(); longType.put("null"); longType.put("long"); ret.put("type", longType); return ret; case FLOAT: final JSONArray floatType = new JSONArray(); floatType.put("null"); floatType.put("float"); ret.put("type", floatType); return ret; case DOUBLE: final JSONArray doubleType = new JSONArray(); doubleType.put("null"); doubleType.put("double"); ret.put("type", doubleType); return ret; case STRING: final JSONArray stringType = new JSONArray(); stringType.put("null"); stringType.put("string"); ret.put("type", stringType); return ret; default: return null; } } } }