/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.hive;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import io.netty.buffer.DrillBuf;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.expr.holders.Decimal18Holder;
import org.apache.drill.exec.expr.holders.Decimal28SparseHolder;
import org.apache.drill.exec.expr.holders.Decimal38SparseHolder;
import org.apache.drill.exec.expr.holders.Decimal9Holder;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.exec.util.DecimalUtility;
import org.apache.drill.exec.vector.NullableBigIntVector;
import org.apache.drill.exec.vector.NullableBitVector;
import org.apache.drill.exec.vector.NullableDateVector;
import org.apache.drill.exec.vector.NullableDecimal18Vector;
import org.apache.drill.exec.vector.NullableDecimal28SparseVector;
import org.apache.drill.exec.vector.NullableDecimal38SparseVector;
import org.apache.drill.exec.vector.NullableDecimal9Vector;
import org.apache.drill.exec.vector.NullableFloat4Vector;
import org.apache.drill.exec.vector.NullableFloat8Vector;
import org.apache.drill.exec.vector.NullableIntVector;
import org.apache.drill.exec.vector.NullableTimeStampVector;
import org.apache.drill.exec.vector.NullableVarBinaryVector;
import org.apache.drill.exec.vector.NullableVarCharVector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.work.ExecErrorConstants;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import java.math.BigDecimal;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
public class HiveUtilities {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveUtilities.class);
/** Partition value is received in string format. Convert it into appropriate object based on the type. */
public static Object convertPartitionType(TypeInfo typeInfo, String value, final String defaultPartitionValue) {
if (typeInfo.getCategory() != Category.PRIMITIVE) {
// In Hive only primitive types are allowed as partition column types.
throw new DrillRuntimeException("Non-Primitive types are not allowed as partition column type in Hive, " +
"but received one: " + typeInfo.getCategory());
}
if (defaultPartitionValue.equals(value)) {
return null;
}
final PrimitiveCategory pCat = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
try {
switch (pCat) {
case BINARY:
return value.getBytes();
case BOOLEAN:
return Boolean.parseBoolean(value);
case DECIMAL: {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
return HiveDecimalUtils.enforcePrecisionScale(HiveDecimal.create(value),
decimalTypeInfo.precision(), decimalTypeInfo.scale());
}
case DOUBLE:
return Double.parseDouble(value);
case FLOAT:
return Float.parseFloat(value);
case BYTE:
case SHORT:
case INT:
return Integer.parseInt(value);
case LONG:
return Long.parseLong(value);
case STRING:
case VARCHAR:
return value.getBytes();
case CHAR:
return value.trim().getBytes();
case TIMESTAMP:
return Timestamp.valueOf(value);
case DATE:
return Date.valueOf(value);
}
} catch(final Exception e) {
// In Hive, partition values that can't be converted from string are considered to be NULL.
logger.trace("Failed to interpret '{}' value from partition value string '{}'", pCat, value);
return null;
}
throwUnsupportedHiveDataTypeError(pCat.toString());
return null;
}
public static void populateVector(final ValueVector vector, final DrillBuf managedBuffer, final Object val,
final int start, final int end) {
TypeProtos.MinorType type = vector.getField().getType().getMinorType();
switch(type) {
case VARBINARY: {
NullableVarBinaryVector v = (NullableVarBinaryVector) vector;
byte[] value = (byte[]) val;
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, value, 0, value.length);
}
break;
}
case BIT: {
NullableBitVector v = (NullableBitVector) vector;
Boolean value = (Boolean) val;
for (int i = start; i < end; i++) {
v.getMutator().set(i, value ? 1 : 0);
}
break;
}
case FLOAT8: {
NullableFloat8Vector v = (NullableFloat8Vector) vector;
double value = (double) val;
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, value);
}
break;
}
case FLOAT4: {
NullableFloat4Vector v = (NullableFloat4Vector) vector;
float value = (float) val;
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, value);
}
break;
}
case TINYINT:
case SMALLINT:
case INT: {
NullableIntVector v = (NullableIntVector) vector;
int value = (int) val;
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, value);
}
break;
}
case BIGINT: {
NullableBigIntVector v = (NullableBigIntVector) vector;
long value = (long) val;
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, value);
}
break;
}
case VARCHAR: {
NullableVarCharVector v = (NullableVarCharVector) vector;
byte[] value = (byte[]) val;
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, value, 0, value.length);
}
break;
}
case TIMESTAMP: {
NullableTimeStampVector v = (NullableTimeStampVector) vector;
DateTime ts = new DateTime(((Timestamp) val).getTime()).withZoneRetainFields(DateTimeZone.UTC);
long value = ts.getMillis();
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, value);
}
break;
}
case DATE: {
NullableDateVector v = (NullableDateVector) vector;
DateTime date = new DateTime(((Date)val).getTime()).withZoneRetainFields(DateTimeZone.UTC);
long value = date.getMillis();
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, value);
}
break;
}
case DECIMAL9: {
final BigDecimal value = ((HiveDecimal)val).bigDecimalValue();
final NullableDecimal9Vector v = ((NullableDecimal9Vector) vector);
final Decimal9Holder holder = new Decimal9Holder();
holder.scale = v.getField().getScale();
holder.precision = v.getField().getPrecision();
holder.value = DecimalUtility.getDecimal9FromBigDecimal(value, holder.scale, holder.precision);
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, holder);
}
break;
}
case DECIMAL18: {
final BigDecimal value = ((HiveDecimal)val).bigDecimalValue();
final NullableDecimal18Vector v = ((NullableDecimal18Vector) vector);
final Decimal18Holder holder = new Decimal18Holder();
holder.scale = v.getField().getScale();
holder.precision = v.getField().getPrecision();
holder.value = DecimalUtility.getDecimal18FromBigDecimal(value, holder.scale, holder.precision);
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, holder);
}
break;
}
case DECIMAL28SPARSE: {
final int needSpace = Decimal28SparseHolder.nDecimalDigits * DecimalUtility.INTEGER_SIZE;
Preconditions.checkArgument(managedBuffer.capacity() > needSpace,
String.format("Not sufficient space in given managed buffer. Need %d bytes, buffer has %d bytes",
needSpace, managedBuffer.capacity()));
final BigDecimal value = ((HiveDecimal)val).bigDecimalValue();
final NullableDecimal28SparseVector v = ((NullableDecimal28SparseVector) vector);
final Decimal28SparseHolder holder = new Decimal28SparseHolder();
holder.scale = v.getField().getScale();
holder.precision = v.getField().getPrecision();
holder.buffer = managedBuffer;
holder.start = 0;
DecimalUtility.getSparseFromBigDecimal(value, holder.buffer, 0, holder.scale, holder.precision,
Decimal28SparseHolder.nDecimalDigits);
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, holder);
}
break;
}
case DECIMAL38SPARSE: {
final int needSpace = Decimal38SparseHolder.nDecimalDigits * DecimalUtility.INTEGER_SIZE;
Preconditions.checkArgument(managedBuffer.capacity() > needSpace,
String.format("Not sufficient space in given managed buffer. Need %d bytes, buffer has %d bytes",
needSpace, managedBuffer.capacity()));
final BigDecimal value = ((HiveDecimal)val).bigDecimalValue();
final NullableDecimal38SparseVector v = ((NullableDecimal38SparseVector) vector);
final Decimal38SparseHolder holder = new Decimal38SparseHolder();
holder.scale = v.getField().getScale();
holder.precision = v.getField().getPrecision();
holder.buffer = managedBuffer;
holder.start = 0;
DecimalUtility.getSparseFromBigDecimal(value, holder.buffer, 0, holder.scale, holder.precision,
Decimal38SparseHolder.nDecimalDigits);
for (int i = start; i < end; i++) {
v.getMutator().setSafe(i, holder);
}
break;
}
}
}
public static MajorType getMajorTypeFromHiveTypeInfo(final TypeInfo typeInfo, final OptionManager options) {
switch (typeInfo.getCategory()) {
case PRIMITIVE: {
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
MinorType minorType = HiveUtilities.getMinorTypeFromHivePrimitiveTypeInfo(primitiveTypeInfo, options);
MajorType.Builder typeBuilder = MajorType.newBuilder().setMinorType(minorType)
.setMode(DataMode.OPTIONAL); // Hive columns (both regular and partition) could have null values
switch (primitiveTypeInfo.getPrimitiveCategory()) {
case CHAR:
case VARCHAR:
BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) primitiveTypeInfo;
typeBuilder.setPrecision(baseCharTypeInfo.getLength());
break;
case DECIMAL:
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
typeBuilder.setPrecision(decimalTypeInfo.getPrecision()).setScale(decimalTypeInfo.getScale());
break;
default:
// do nothing, other primitive categories do not have precision or scale
}
return typeBuilder.build();
}
case LIST:
case MAP:
case STRUCT:
case UNION:
default:
throwUnsupportedHiveDataTypeError(typeInfo.getCategory().toString());
}
return null;
}
public static TypeProtos.MinorType getMinorTypeFromHivePrimitiveTypeInfo(PrimitiveTypeInfo primitiveTypeInfo,
OptionManager options) {
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BINARY:
return TypeProtos.MinorType.VARBINARY;
case BOOLEAN:
return TypeProtos.MinorType.BIT;
case DECIMAL: {
if (options.getOption(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY).bool_val == false) {
throw UserException.unsupportedError()
.message(ExecErrorConstants.DECIMAL_DISABLE_ERR_MSG)
.build(logger);
}
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
return DecimalUtility.getDecimalDataType(decimalTypeInfo.precision());
}
case DOUBLE:
return TypeProtos.MinorType.FLOAT8;
case FLOAT:
return TypeProtos.MinorType.FLOAT4;
// TODO (DRILL-2470)
// Byte and short (tinyint and smallint in SQL types) are currently read as integers
// as these smaller integer types are not fully supported in Drill today.
case SHORT:
case BYTE:
case INT:
return TypeProtos.MinorType.INT;
case LONG:
return TypeProtos.MinorType.BIGINT;
case STRING:
case VARCHAR:
case CHAR:
return TypeProtos.MinorType.VARCHAR;
case TIMESTAMP:
return TypeProtos.MinorType.TIMESTAMP;
case DATE:
return TypeProtos.MinorType.DATE;
}
throwUnsupportedHiveDataTypeError(primitiveTypeInfo.getPrimitiveCategory().toString());
return null;
}
/**
* Utility method which gets table or partition {@link InputFormat} class. First it
* tries to get the class name from given StorageDescriptor object. If it doesn't contain it tries to get it from
* StorageHandler class set in table properties. If not found throws an exception.
* @param job {@link JobConf} instance needed incase the table is StorageHandler based table.
* @param sd {@link StorageDescriptor} instance of currently reading partition or table (for non-partitioned tables).
* @param table Table object
* @throws Exception
*/
public static Class<? extends InputFormat<?, ?>> getInputFormatClass(final JobConf job, final StorageDescriptor sd,
final Table table) throws Exception {
final String inputFormatName = sd.getInputFormat();
if (Strings.isNullOrEmpty(inputFormatName)) {
final String storageHandlerClass = table.getParameters().get(META_TABLE_STORAGE);
if (Strings.isNullOrEmpty(storageHandlerClass)) {
throw new ExecutionSetupException("Unable to get Hive table InputFormat class. There is neither " +
"InputFormat class explicitly specified nor StorageHandler class");
}
final HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(job, storageHandlerClass);
return (Class<? extends InputFormat<?, ?>>) storageHandler.getInputFormatClass();
} else {
return (Class<? extends InputFormat<?, ?>>) Class.forName(inputFormatName) ;
}
}
/**
* Utility method which adds give configs to {@link JobConf} object.
*
* @param job {@link JobConf} instance.
* @param properties New config properties
* @param hiveConf HiveConf of Hive storage plugin
*/
public static void addConfToJob(final JobConf job, final Properties properties) {
for (Object obj : properties.keySet()) {
job.set((String) obj, (String) properties.get(obj));
}
}
/**
* Wrapper around {@link MetaStoreUtils#getPartitionMetadata(Partition, Table)} which also adds parameters from table
* to properties returned by {@link MetaStoreUtils#getPartitionMetadata(Partition, Table)}.
*
* @param partition the source of partition level parameters
* @param table the source of table level parameters
* @return properties
*/
public static Properties getPartitionMetadata(final HivePartition partition, final HiveTableWithColumnCache table) {
final Properties properties;
restoreColumns(table, partition);
properties = MetaStoreUtils.getPartitionMetadata(partition, table);
// SerDe expects properties from Table, but above call doesn't add Table properties.
// Include Table properties in final list in order to not to break SerDes that depend on
// Table properties. For example AvroSerDe gets the schema from properties (passed as second argument)
for (Map.Entry<String, String> entry : table.getParameters().entrySet()) {
if (entry.getKey() != null && entry.getKey() != null) {
properties.put(entry.getKey(), entry.getValue());
}
}
return properties;
}
/**
* Sets columns from table cache to table and partition.
*
* @param partition partition which will set column list
* @param table the source of column lists cache
*/
public static void restoreColumns(HiveTableWithColumnCache table, HivePartition partition) {
// exactly the same column lists for partitions or table
// stored only one time to reduce physical plan serialization
if (partition != null && partition.getSd().getCols() == null) {
partition.getSd().setCols(table.getColumnListsCache().getColumns(partition.getColumnListIndex()));
}
if (table.getSd().getCols() == null) {
table.getSd().setCols(table.getColumnListsCache().getColumns(0));
}
}
/**
* Wrapper around {@link MetaStoreUtils#getSchema(StorageDescriptor, StorageDescriptor, Map, String, String, List)}
* which also sets columns from table cache to table and returns properties returned by
* {@link MetaStoreUtils#getSchema(StorageDescriptor, StorageDescriptor, Map, String, String, List)}.
*/
public static Properties getTableMetadata(HiveTableWithColumnCache table) {
restoreColumns(table, null);
return MetaStoreUtils.getSchema(table.getSd(), table.getSd(), table.getParameters(),
table.getDbName(), table.getTableName(), table.getPartitionKeys());
}
public static void throwUnsupportedHiveDataTypeError(String unsupportedType) {
StringBuilder errMsg = new StringBuilder();
errMsg.append(String.format("Unsupported Hive data type %s. ", unsupportedType));
errMsg.append(System.getProperty("line.separator"));
errMsg.append("Following Hive data types are supported in Drill for querying: ");
errMsg.append(
"BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, TIMESTAMP, BINARY, DECIMAL, STRING, VARCHAR and CHAR");
throw UserException.unsupportedError()
.message(errMsg.toString())
.build(logger);
}
}