/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hadoop.hive.metastore.hbase; import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Function; import org.apache.hadoop.hive.metastore.api.FunctionType; import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; import org.apache.hadoop.hive.metastore.api.ResourceType; import org.apache.hadoop.hive.metastore.api.ResourceUri; import org.apache.hadoop.hive.metastore.api.Role; import org.apache.hadoop.hive.metastore.api.SQLForeignKey; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDeWithEndPrefix; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.BloomFilter; import org.apache.hive.common.util.HiveStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.protobuf.ByteString; import com.google.protobuf.InvalidProtocolBufferException; /** * Utility functions */ public class HBaseUtils { final static Charset ENCODING = StandardCharsets.UTF_8; final static char KEY_SEPARATOR = '\u0001'; final static String KEY_SEPARATOR_STR = new String(new char[] {KEY_SEPARATOR}); static final private Logger LOG = LoggerFactory.getLogger(HBaseUtils.class.getName()); /** * Build a key for an object in hbase * @param components * @return */ static byte[] buildKey(String... components) { return buildKey(false, components); } static byte[] buildKeyWithTrailingSeparator(String... components) { return buildKey(true, components); } private static byte[] buildKey(boolean trailingSeparator, String... components) { String protoKey = StringUtils.join(components, KEY_SEPARATOR); if (trailingSeparator) protoKey += KEY_SEPARATOR; return protoKey.getBytes(ENCODING); } private static HbaseMetastoreProto.Parameters buildParameters(Map<String, String> params) { List<HbaseMetastoreProto.ParameterEntry> entries = new ArrayList<>(); for (Map.Entry<String, String> e : params.entrySet()) { entries.add( HbaseMetastoreProto.ParameterEntry.newBuilder() .setKey(e.getKey()) .setValue(e.getValue()) .build()); } return HbaseMetastoreProto.Parameters.newBuilder() .addAllParameter(entries) .build(); } private static Map<String, String> buildParameters(HbaseMetastoreProto.Parameters protoParams) { Map<String, String> params = new HashMap<>(); for (HbaseMetastoreProto.ParameterEntry pe : protoParams.getParameterList()) { params.put(pe.getKey(), pe.getValue()); } return params; } private static List<HbaseMetastoreProto.PrincipalPrivilegeSetEntry> buildPrincipalPrivilegeSetEntry(Map<String, List<PrivilegeGrantInfo>> entries) { List<HbaseMetastoreProto.PrincipalPrivilegeSetEntry> results = new ArrayList<>(); for (Map.Entry<String, List<PrivilegeGrantInfo>> entry : entries.entrySet()) { results.add(HbaseMetastoreProto.PrincipalPrivilegeSetEntry.newBuilder() .setPrincipalName(entry.getKey()) .addAllPrivileges(buildPrivilegeGrantInfo(entry.getValue())) .build()); } return results; } private static List<HbaseMetastoreProto.PrivilegeGrantInfo> buildPrivilegeGrantInfo( List<PrivilegeGrantInfo> privileges) { List<HbaseMetastoreProto.PrivilegeGrantInfo> results = new ArrayList<>(); for (PrivilegeGrantInfo privilege : privileges) { HbaseMetastoreProto.PrivilegeGrantInfo.Builder builder = HbaseMetastoreProto.PrivilegeGrantInfo.newBuilder(); if (privilege.getPrivilege() != null) builder.setPrivilege(privilege.getPrivilege()); builder.setCreateTime(privilege.getCreateTime()); if (privilege.getGrantor() != null) builder.setGrantor(privilege.getGrantor()); if (privilege.getGrantorType() != null) { builder.setGrantorType(convertPrincipalTypes(privilege.getGrantorType())); } builder.setGrantOption(privilege.isGrantOption()); results.add(builder.build()); } return results; } /** * Convert Thrift.PrincipalType to HbaseMetastoreProto.principalType * @param type * @return */ static HbaseMetastoreProto.PrincipalType convertPrincipalTypes(PrincipalType type) { switch (type) { case USER: return HbaseMetastoreProto.PrincipalType.USER; case ROLE: return HbaseMetastoreProto.PrincipalType.ROLE; default: throw new RuntimeException("Unknown principal type " + type.toString()); } } /** * Convert principalType from HbaseMetastoreProto to Thrift.PrincipalType * @param type * @return */ static PrincipalType convertPrincipalTypes(HbaseMetastoreProto.PrincipalType type) { switch (type) { case USER: return PrincipalType.USER; case ROLE: return PrincipalType.ROLE; default: throw new RuntimeException("Unknown principal type " + type.toString()); } } private static Map<String, List<PrivilegeGrantInfo>> convertPrincipalPrivilegeSetEntries( List<HbaseMetastoreProto.PrincipalPrivilegeSetEntry> entries) { Map<String, List<PrivilegeGrantInfo>> map = new HashMap<>(); for (HbaseMetastoreProto.PrincipalPrivilegeSetEntry entry : entries) { map.put(entry.getPrincipalName(), convertPrivilegeGrantInfos(entry.getPrivilegesList())); } return map; } private static List<PrivilegeGrantInfo> convertPrivilegeGrantInfos( List<HbaseMetastoreProto.PrivilegeGrantInfo> privileges) { List<PrivilegeGrantInfo> results = new ArrayList<>(); for (HbaseMetastoreProto.PrivilegeGrantInfo proto : privileges) { PrivilegeGrantInfo pgi = new PrivilegeGrantInfo(); if (proto.hasPrivilege()) pgi.setPrivilege(proto.getPrivilege()); pgi.setCreateTime((int)proto.getCreateTime()); if (proto.hasGrantor()) pgi.setGrantor(proto.getGrantor()); if (proto.hasGrantorType()) { pgi.setGrantorType(convertPrincipalTypes(proto.getGrantorType())); } if (proto.hasGrantOption()) pgi.setGrantOption(proto.getGrantOption()); results.add(pgi); } return results; } private static HbaseMetastoreProto.PrincipalPrivilegeSet buildPrincipalPrivilegeSet(PrincipalPrivilegeSet pps) { HbaseMetastoreProto.PrincipalPrivilegeSet.Builder builder = HbaseMetastoreProto.PrincipalPrivilegeSet.newBuilder(); if (pps.getUserPrivileges() != null) { builder.addAllUsers(buildPrincipalPrivilegeSetEntry(pps.getUserPrivileges())); } if (pps.getRolePrivileges() != null) { builder.addAllRoles(buildPrincipalPrivilegeSetEntry(pps.getRolePrivileges())); } return builder.build(); } private static PrincipalPrivilegeSet buildPrincipalPrivilegeSet( HbaseMetastoreProto.PrincipalPrivilegeSet proto) throws InvalidProtocolBufferException { PrincipalPrivilegeSet pps = null; if (!proto.getUsersList().isEmpty() || !proto.getRolesList().isEmpty()) { pps = new PrincipalPrivilegeSet(); if (!proto.getUsersList().isEmpty()) { pps.setUserPrivileges(convertPrincipalPrivilegeSetEntries(proto.getUsersList())); } if (!proto.getRolesList().isEmpty()) { pps.setRolePrivileges(convertPrincipalPrivilegeSetEntries(proto.getRolesList())); } } return pps; } /** * Serialize a PrincipalPrivilegeSet * @param pps * @return */ static byte[] serializePrincipalPrivilegeSet(PrincipalPrivilegeSet pps) { return buildPrincipalPrivilegeSet(pps).toByteArray(); } /** * Deserialize a PrincipalPrivilegeSet * @param serialized * @return * @throws InvalidProtocolBufferException */ static PrincipalPrivilegeSet deserializePrincipalPrivilegeSet(byte[] serialized) throws InvalidProtocolBufferException { HbaseMetastoreProto.PrincipalPrivilegeSet proto = HbaseMetastoreProto.PrincipalPrivilegeSet.parseFrom(serialized); return buildPrincipalPrivilegeSet(proto); } /** * Serialize a role * @param role * @return two byte arrays, first contains the key, the second the serialized value. */ static byte[][] serializeRole(Role role) { byte[][] result = new byte[2][]; result[0] = buildKey(role.getRoleName()); HbaseMetastoreProto.Role.Builder builder = HbaseMetastoreProto.Role.newBuilder(); builder.setCreateTime(role.getCreateTime()); if (role.getOwnerName() != null) builder.setOwnerName(role.getOwnerName()); result[1] = builder.build().toByteArray(); return result; } /** * Deserialize a role. This method should be used when the rolename is already known as it * doesn't have to re-deserialize it. * @param roleName name of the role * @param value value fetched from hbase * @return A role * @throws InvalidProtocolBufferException */ static Role deserializeRole(String roleName, byte[] value) throws InvalidProtocolBufferException { Role role = new Role(); role.setRoleName(roleName); HbaseMetastoreProto.Role protoRole = HbaseMetastoreProto.Role.parseFrom(value); role.setCreateTime((int)protoRole.getCreateTime()); if (protoRole.hasOwnerName()) role.setOwnerName(protoRole.getOwnerName()); return role; } /** * Deserialize a role. This method should be used when the rolename is not already known (eg * when doing a scan). * @param key key from hbase * @param value value from hbase * @return a role * @throws InvalidProtocolBufferException */ static Role deserializeRole(byte[] key, byte[] value) throws InvalidProtocolBufferException { String roleName = new String(key, ENCODING); return deserializeRole(roleName, value); } /** * Serialize a list of role names * @param roles * @return */ static byte[] serializeRoleList(List<String> roles) { return HbaseMetastoreProto.RoleList.newBuilder() .addAllRole(roles) .build() .toByteArray(); } static List<String> deserializeRoleList(byte[] value) throws InvalidProtocolBufferException { HbaseMetastoreProto.RoleList proto = HbaseMetastoreProto.RoleList.parseFrom(value); return new ArrayList<>(proto.getRoleList()); } /** * Serialize a database * @param db * @return two byte arrays, first contains the key, the second the serialized value. */ static byte[][] serializeDatabase(Database db) { byte[][] result = new byte[2][]; result[0] = buildKey(HiveStringUtils.normalizeIdentifier(db.getName())); HbaseMetastoreProto.Database.Builder builder = HbaseMetastoreProto.Database.newBuilder(); if (db.getDescription() != null) builder.setDescription(db.getDescription()); if (db.getLocationUri() != null) builder.setUri(db.getLocationUri()); if (db.getParameters() != null) builder.setParameters(buildParameters(db.getParameters())); if (db.getPrivileges() != null) { builder.setPrivileges(buildPrincipalPrivilegeSet(db.getPrivileges())); } if (db.getOwnerName() != null) builder.setOwnerName(db.getOwnerName()); if (db.getOwnerType() != null) builder.setOwnerType(convertPrincipalTypes(db.getOwnerType())); result[1] = builder.build().toByteArray(); return result; } /** * Deserialize a database. This method should be used when the db anme is already known as it * doesn't have to re-deserialize it. * @param dbName name of the role * @param value value fetched from hbase * @return A database * @throws InvalidProtocolBufferException */ static Database deserializeDatabase(String dbName, byte[] value) throws InvalidProtocolBufferException { Database db = new Database(); db.setName(dbName); HbaseMetastoreProto.Database protoDb = HbaseMetastoreProto.Database.parseFrom(value); if (protoDb.hasDescription()) db.setDescription(protoDb.getDescription()); if (protoDb.hasUri()) db.setLocationUri(protoDb.getUri()); if (protoDb.hasParameters()) db.setParameters(buildParameters(protoDb.getParameters())); if (protoDb.hasPrivileges()) { db.setPrivileges(buildPrincipalPrivilegeSet(protoDb.getPrivileges())); } if (protoDb.hasOwnerName()) db.setOwnerName(protoDb.getOwnerName()); if (protoDb.hasOwnerType()) db.setOwnerType(convertPrincipalTypes(protoDb.getOwnerType())); return db; } /** * Deserialize a database. This method should be used when the db name is not already known (eg * when doing a scan). * @param key key from hbase * @param value value from hbase * @return a role * @throws InvalidProtocolBufferException */ static Database deserializeDatabase(byte[] key, byte[] value) throws InvalidProtocolBufferException { String dbName = new String(key, ENCODING); return deserializeDatabase(dbName, value); } /** * Serialize a function * @param func function to serialize * @return two byte arrays, first contains the key, the second the value. */ static byte[][] serializeFunction(Function func) { byte[][] result = new byte[2][]; result[0] = buildKey(func.getDbName(), func.getFunctionName()); HbaseMetastoreProto.Function.Builder builder = HbaseMetastoreProto.Function.newBuilder(); if (func.getClassName() != null) builder.setClassName(func.getClassName()); if (func.getOwnerName() != null) builder.setOwnerName(func.getOwnerName()); if (func.getOwnerType() != null) { builder.setOwnerType(convertPrincipalTypes(func.getOwnerType())); } builder.setCreateTime(func.getCreateTime()); if (func.getFunctionType() != null) { builder.setFunctionType(convertFunctionTypes(func.getFunctionType())); } if (func.getResourceUris() != null) { for (ResourceUri uri : func.getResourceUris()) { builder.addResourceUris(HbaseMetastoreProto.Function.ResourceUri.newBuilder() .setResourceType(convertResourceTypes(uri.getResourceType())) .setUri(uri.getUri())); } } result[1] = builder.build().toByteArray(); return result; } /** * Deserialize a function. This method should be used when the function and db name are * already known. * @param dbName name of the database the function is in * @param functionName name of the function * @param value serialized value of the function * @return function as an object * @throws InvalidProtocolBufferException */ static Function deserializeFunction(String dbName, String functionName, byte[] value) throws InvalidProtocolBufferException { Function func = new Function(); func.setDbName(dbName); func.setFunctionName(functionName); HbaseMetastoreProto.Function protoFunc = HbaseMetastoreProto.Function.parseFrom(value); if (protoFunc.hasClassName()) func.setClassName(protoFunc.getClassName()); if (protoFunc.hasOwnerName()) func.setOwnerName(protoFunc.getOwnerName()); if (protoFunc.hasOwnerType()) { func.setOwnerType(convertPrincipalTypes(protoFunc.getOwnerType())); } func.setCreateTime((int)protoFunc.getCreateTime()); if (protoFunc.hasFunctionType()) { func.setFunctionType(convertFunctionTypes(protoFunc.getFunctionType())); } for (HbaseMetastoreProto.Function.ResourceUri protoUri : protoFunc.getResourceUrisList()) { func.addToResourceUris(new ResourceUri(convertResourceTypes(protoUri.getResourceType()), protoUri.getUri())); } return func; } /** * Deserialize a function. This method should be used when the dbname and function name are * not already known, such as in a scan. * @param key key from hbase * @param value value from hbase * @return function object * @throws InvalidProtocolBufferException */ static Function deserializeFunction(byte[] key, byte[] value) throws InvalidProtocolBufferException { String[] keys = deserializeKey(key); return deserializeFunction(keys[0], keys[1], value); } private static HbaseMetastoreProto.Function.FunctionType convertFunctionTypes(FunctionType type) { switch (type) { case JAVA: return HbaseMetastoreProto.Function.FunctionType.JAVA; default: throw new RuntimeException("Unknown function type " + type.toString()); } } private static FunctionType convertFunctionTypes(HbaseMetastoreProto.Function.FunctionType type) { switch (type) { case JAVA: return FunctionType.JAVA; default: throw new RuntimeException("Unknown function type " + type.toString()); } } private static HbaseMetastoreProto.Function.ResourceUri.ResourceType convertResourceTypes(ResourceType type) { switch (type) { case JAR: return HbaseMetastoreProto.Function.ResourceUri.ResourceType.JAR; case FILE: return HbaseMetastoreProto.Function.ResourceUri.ResourceType.FILE; case ARCHIVE: return HbaseMetastoreProto.Function.ResourceUri.ResourceType.ARCHIVE; default: throw new RuntimeException("Unknown resource type " + type.toString()); } } private static ResourceType convertResourceTypes( HbaseMetastoreProto.Function.ResourceUri.ResourceType type) { switch (type) { case JAR: return ResourceType.JAR; case FILE: return ResourceType.FILE; case ARCHIVE: return ResourceType.ARCHIVE; default: throw new RuntimeException("Unknown resource type " + type.toString()); } } private static List<FieldSchema> convertFieldSchemaListFromProto(List<HbaseMetastoreProto.FieldSchema> protoList) { List<FieldSchema> schemas = new ArrayList<>(protoList.size()); for (HbaseMetastoreProto.FieldSchema proto : protoList) { schemas.add(new FieldSchema(proto.getName(), proto.getType(), proto.hasComment() ? proto.getComment() : null)); } return schemas; } private static List<HbaseMetastoreProto.FieldSchema> convertFieldSchemaListToProto(List<FieldSchema> schemas) { List<HbaseMetastoreProto.FieldSchema> protoList = new ArrayList<>(schemas.size()); for (FieldSchema fs : schemas) { HbaseMetastoreProto.FieldSchema.Builder builder = HbaseMetastoreProto.FieldSchema.newBuilder(); builder .setName(fs.getName()) .setType(fs.getType()); if (fs.getComment() != null) builder.setComment(fs.getComment()); protoList.add(builder.build()); } return protoList; } /** * Serialize a storage descriptor. * @param sd storage descriptor to serialize * @return serialized storage descriptor. */ static byte[] serializeStorageDescriptor(StorageDescriptor sd) { HbaseMetastoreProto.StorageDescriptor.Builder builder = HbaseMetastoreProto.StorageDescriptor.newBuilder(); builder.addAllCols(convertFieldSchemaListToProto(sd.getCols())); if (sd.getInputFormat() != null) { builder.setInputFormat(sd.getInputFormat()); } if (sd.getOutputFormat() != null) { builder.setOutputFormat(sd.getOutputFormat()); } builder.setIsCompressed(sd.isCompressed()); builder.setNumBuckets(sd.getNumBuckets()); if (sd.getSerdeInfo() != null) { HbaseMetastoreProto.StorageDescriptor.SerDeInfo.Builder serdeBuilder = HbaseMetastoreProto.StorageDescriptor.SerDeInfo.newBuilder(); SerDeInfo serde = sd.getSerdeInfo(); if (serde.getName() != null) { serdeBuilder.setName(serde.getName()); } if (serde.getSerializationLib() != null) { serdeBuilder.setSerializationLib(serde.getSerializationLib()); } if (serde.getParameters() != null) { serdeBuilder.setParameters(buildParameters(serde.getParameters())); } builder.setSerdeInfo(serdeBuilder); } if (sd.getBucketCols() != null) { builder.addAllBucketCols(sd.getBucketCols()); } if (sd.getSortCols() != null) { List<Order> orders = sd.getSortCols(); List<HbaseMetastoreProto.StorageDescriptor.Order> protoList = new ArrayList<>(orders.size()); for (Order order : orders) { protoList.add(HbaseMetastoreProto.StorageDescriptor.Order.newBuilder() .setColumnName(order.getCol()) .setOrder(order.getOrder()) .build()); } builder.addAllSortCols(protoList); } if (sd.getSkewedInfo() != null) { HbaseMetastoreProto.StorageDescriptor.SkewedInfo.Builder skewBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.newBuilder(); SkewedInfo skewed = sd.getSkewedInfo(); if (skewed.getSkewedColNames() != null) { skewBuilder.addAllSkewedColNames(skewed.getSkewedColNames()); } if (skewed.getSkewedColValues() != null) { for (List<String> innerList : skewed.getSkewedColValues()) { HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList.Builder listBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList.newBuilder(); listBuilder.addAllSkewedColValue(innerList); skewBuilder.addSkewedColValues(listBuilder); } } if (skewed.getSkewedColValueLocationMaps() != null) { for (Map.Entry<List<String>, String> e : skewed.getSkewedColValueLocationMaps().entrySet()) { HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap.Builder mapBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap.newBuilder(); mapBuilder.addAllKey(e.getKey()); mapBuilder.setValue(e.getValue()); skewBuilder.addSkewedColValueLocationMaps(mapBuilder); } } builder.setSkewedInfo(skewBuilder); } builder.setStoredAsSubDirectories(sd.isStoredAsSubDirectories()); return builder.build().toByteArray(); } /** * Produce a hash for the storage descriptor * @param sd storage descriptor to hash * @param md message descriptor to use to generate the hash * @return the hash as a byte array */ public static byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) { // Note all maps and lists have to be absolutely sorted. Otherwise we'll produce different // results for hashes based on the OS or JVM being used. md.reset(); for (FieldSchema fs : sd.getCols()) { md.update(fs.getName().getBytes(ENCODING)); md.update(fs.getType().getBytes(ENCODING)); if (fs.getComment() != null) md.update(fs.getComment().getBytes(ENCODING)); } if (sd.getInputFormat() != null) { md.update(sd.getInputFormat().getBytes(ENCODING)); } if (sd.getOutputFormat() != null) { md.update(sd.getOutputFormat().getBytes(ENCODING)); } md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING)); md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING)); if (sd.getSerdeInfo() != null) { SerDeInfo serde = sd.getSerdeInfo(); if (serde.getName() != null) { md.update(serde.getName().getBytes(ENCODING)); } if (serde.getSerializationLib() != null) { md.update(serde.getSerializationLib().getBytes(ENCODING)); } if (serde.getParameters() != null) { SortedMap<String, String> params = new TreeMap<>(serde.getParameters()); for (Map.Entry<String, String> param : params.entrySet()) { md.update(param.getKey().getBytes(ENCODING)); md.update(param.getValue().getBytes(ENCODING)); } } } if (sd.getBucketCols() != null) { List<String> bucketCols = new ArrayList<>(sd.getBucketCols()); for (String bucket : bucketCols) md.update(bucket.getBytes(ENCODING)); } if (sd.getSortCols() != null) { SortedSet<Order> orders = new TreeSet<>(sd.getSortCols()); for (Order order : orders) { md.update(order.getCol().getBytes(ENCODING)); md.update(Integer.toString(order.getOrder()).getBytes(ENCODING)); } } if (sd.getSkewedInfo() != null) { SkewedInfo skewed = sd.getSkewedInfo(); if (skewed.getSkewedColNames() != null) { SortedSet<String> colnames = new TreeSet<>(skewed.getSkewedColNames()); for (String colname : colnames) md.update(colname.getBytes(ENCODING)); } if (skewed.getSkewedColValues() != null) { SortedSet<String> sortedOuterList = new TreeSet<>(); for (List<String> innerList : skewed.getSkewedColValues()) { SortedSet<String> sortedInnerList = new TreeSet<>(innerList); sortedOuterList.add(StringUtils.join(sortedInnerList, ".")); } for (String colval : sortedOuterList) md.update(colval.getBytes(ENCODING)); } if (skewed.getSkewedColValueLocationMaps() != null) { SortedMap<String, String> sortedMap = new TreeMap<>(); for (Map.Entry<List<String>, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) { SortedSet<String> sortedKey = new TreeSet<>(smap.getKey()); sortedMap.put(StringUtils.join(sortedKey, "."), smap.getValue()); } for (Map.Entry<String, String> e : sortedMap.entrySet()) { md.update(e.getKey().getBytes(ENCODING)); md.update(e.getValue().getBytes(ENCODING)); } } md.update(sd.isStoredAsSubDirectories() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING)); } return md.digest(); } static StorageDescriptor deserializeStorageDescriptor(byte[] serialized) throws InvalidProtocolBufferException { HbaseMetastoreProto.StorageDescriptor proto = HbaseMetastoreProto.StorageDescriptor.parseFrom(serialized); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(convertFieldSchemaListFromProto(proto.getColsList())); if (proto.hasInputFormat()) sd.setInputFormat(proto.getInputFormat()); if (proto.hasOutputFormat()) sd.setOutputFormat(proto.getOutputFormat()); sd.setCompressed(proto.getIsCompressed()); sd.setNumBuckets(proto.getNumBuckets()); if (proto.hasSerdeInfo()) { SerDeInfo serde = new SerDeInfo(); serde.setName(proto.getSerdeInfo().hasName()? proto.getSerdeInfo().getName():null); serde.setSerializationLib(proto.getSerdeInfo().hasSerializationLib()? proto.getSerdeInfo().getSerializationLib():null); serde.setParameters(buildParameters(proto.getSerdeInfo().getParameters())); sd.setSerdeInfo(serde); } sd.setBucketCols(new ArrayList<>(proto.getBucketColsList())); List<Order> sortCols = new ArrayList<>(); for (HbaseMetastoreProto.StorageDescriptor.Order protoOrder : proto.getSortColsList()) { sortCols.add(new Order(protoOrder.getColumnName(), protoOrder.getOrder())); } sd.setSortCols(sortCols); if (proto.hasSkewedInfo()) { SkewedInfo skewed = new SkewedInfo(); skewed .setSkewedColNames(new ArrayList<>(proto.getSkewedInfo().getSkewedColNamesList())); for (HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList innerList : proto.getSkewedInfo().getSkewedColValuesList()) { skewed.addToSkewedColValues(new ArrayList<>(innerList.getSkewedColValueList())); } Map<List<String>, String> colMaps = new HashMap<>(); for (HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap map : proto.getSkewedInfo().getSkewedColValueLocationMapsList()) { colMaps.put(new ArrayList<>(map.getKeyList()), map.getValue()); } skewed.setSkewedColValueLocationMaps(colMaps); sd.setSkewedInfo(skewed); } if (proto.hasStoredAsSubDirectories()) { sd.setStoredAsSubDirectories(proto.getStoredAsSubDirectories()); } return sd; } static List<String> getPartitionKeyTypes(List<FieldSchema> parts) { com.google.common.base.Function<FieldSchema, String> fieldSchemaToType = new com.google.common.base.Function<FieldSchema, String>() { public String apply(FieldSchema fs) { return fs.getType(); } }; return Lists.transform(parts, fieldSchemaToType); } static List<String> getPartitionNames(List<FieldSchema> parts) { com.google.common.base.Function<FieldSchema, String> fieldSchemaToName = new com.google.common.base.Function<FieldSchema, String>() { public String apply(FieldSchema fs) { return fs.getName(); } }; return Lists.transform(parts, fieldSchemaToName); } /** * Serialize a partition * @param part partition object * @param sdHash hash that is being used as a key for the enclosed storage descriptor * @return First element is the key, second is the serialized partition */ static byte[][] serializePartition(Partition part, List<String> partTypes, byte[] sdHash) { byte[][] result = new byte[2][]; result[0] = buildPartitionKey(part.getDbName(), part.getTableName(), partTypes, part.getValues()); HbaseMetastoreProto.Partition.Builder builder = HbaseMetastoreProto.Partition.newBuilder(); builder .setCreateTime(part.getCreateTime()) .setLastAccessTime(part.getLastAccessTime()); if (part.getSd().getLocation() != null) builder.setLocation(part.getSd().getLocation()); if (part.getSd().getParameters() != null) { builder.setSdParameters(buildParameters(part.getSd().getParameters())); } builder.setSdHash(ByteString.copyFrom(sdHash)); if (part.getParameters() != null) builder.setParameters(buildParameters(part.getParameters())); result[1] = builder.build().toByteArray(); return result; } static byte[] buildPartitionKey(String dbName, String tableName, List<String> partTypes, List<String> partVals) { return buildPartitionKey(dbName, tableName, partTypes, partVals, false); } static byte[] buildPartitionKey(String dbName, String tableName, List<String> partTypes, List<String> partVals, boolean endPrefix) { Object[] components = new Object[partVals.size()]; for (int i=0;i<partVals.size();i++) { TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(partTypes.get(i)); ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(expectedType); Converter converter = ObjectInspectorConverters.getConverter( PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI); components[i] = converter.convert(partVals.get(i)); } return buildSerializedPartitionKey(dbName, tableName, partTypes, components, endPrefix); } static byte[] buildSerializedPartitionKey(String dbName, String tableName, List<String> partTypes, Object[] components, boolean endPrefix) { ObjectInspector javaStringOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveCategory.STRING); Object[] data = new Object[components.length+2]; List<ObjectInspector> fois = new ArrayList<ObjectInspector>(components.length+2); boolean[] endPrefixes = new boolean[components.length+2]; data[0] = dbName; fois.add(javaStringOI); endPrefixes[0] = false; data[1] = tableName; fois.add(javaStringOI); endPrefixes[1] = false; for (int i = 0; i < components.length; i++) { data[i+2] = components[i]; TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(partTypes.get(i)); ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(expectedType); fois.add(outputOI); } Output output = new Output(); try { BinarySortableSerDeWithEndPrefix.serializeStruct(output, data, fois, endPrefix); } catch (SerDeException e) { throw new RuntimeException("Cannot serialize partition " + StringUtils.join(components, ",")); } return Arrays.copyOf(output.getData(), output.getLength()); } static class StorageDescriptorParts { byte[] sdHash; String location; Map<String, String> parameters; Partition containingPartition; Table containingTable; Index containingIndex; } static void assembleStorageDescriptor(StorageDescriptor sd, StorageDescriptorParts parts) { SharedStorageDescriptor ssd = new SharedStorageDescriptor(); ssd.setLocation(parts.location); ssd.setParameters(parts.parameters); ssd.setShared(sd); if (parts.containingPartition != null) { parts.containingPartition.setSd(ssd); } else if (parts.containingTable != null) { parts.containingTable.setSd(ssd); } else if (parts.containingIndex != null) { parts.containingIndex.setSd(ssd); } else { throw new RuntimeException("Need either a partition or a table"); } } /** * Deserialize a partition key when you know nothing about it. That is, you do not know what * dbname, tablename it came from. * @param key the key fetched from HBase * @param callback A reference to the calling HBaseReadWrite object. This has to be done as a * callback because we have to first deserialize the database name and table * name, and then fetch the table information, and then we will know how to * desierliaze the rest of the key. * @return a list that includes the dbname, tablename, and partition values * @throws IOException */ static List<String> deserializePartitionKey(byte[] key, HBaseReadWrite callback) throws IOException { List<String> keyParts = desierliazeDbNameTableNameFromPartitionKey(key, callback.getConf()); Table table = callback.getTable(keyParts.get(0), keyParts.get(1)); keyParts.addAll(deserializePartitionKey(table.getPartitionKeys(), key, callback.getConf())); return keyParts; } /** * Deserialize a partition. This version should be used when the partition key is not already * known and the database and table name are not known either (eg a full scan). Because the * dbname and tablename (and thus the partition columns) are not known a priori this version * has to go fetch the table after it figures out which table. If you already have the table * object you should use * {@link #deserializePartition(String,String,List,byte[],byte[],Configuration)} * @param key the key fetched from HBase * @param serialized the value fetched from HBase * @param callback A reference to the calling HBaseReadWrite object. This has to be done as a * callback because we have to first deserialize the database name and table * name, and then fetch the table information, and then we will know how to * desierliaze the rest of the key. * @return A struct that contains the partition plus parts of the storage descriptor */ static StorageDescriptorParts deserializePartition(byte[] key, byte[] serialized, HBaseReadWrite callback) throws IOException { List<String> dbNameTableName = desierliazeDbNameTableNameFromPartitionKey(key, callback.getConf()); Table table = callback.getTable(dbNameTableName.get(0), dbNameTableName.get(1)); List<String> keys = deserializePartitionKey(table.getPartitionKeys(), key, callback.getConf()); return deserializePartition(dbNameTableName.get(0), dbNameTableName.get(1), keys, serialized); } /** * Deserialize a partition. This version should be used when you know the dbname and tablename * but not the partition values. * @param dbName database this partition is in * @param tableName table this partition is in * @param partitions schemas for the partition columns of this table * @param key key fetched from HBase * @param serialized serialized version of the partition * @param conf configuration file * @return * @throws InvalidProtocolBufferException */ static StorageDescriptorParts deserializePartition(String dbName, String tableName, List<FieldSchema> partitions, byte[] key, byte[] serialized, Configuration conf) throws InvalidProtocolBufferException { List<String> keys = deserializePartitionKey(partitions, key, conf); return deserializePartition(dbName, tableName, keys, serialized); } /** * Deserialize a partition. This version should be used when the partition key is * known (eg a get). * @param dbName database name * @param tableName table name * @param partVals partition values * @param serialized the value fetched from HBase * @return A struct that contains the partition plus parts of the storage descriptor */ static StorageDescriptorParts deserializePartition(String dbName, String tableName, List<String> partVals, byte[] serialized) throws InvalidProtocolBufferException { HbaseMetastoreProto.Partition proto = HbaseMetastoreProto.Partition.parseFrom(serialized); Partition part = new Partition(); StorageDescriptorParts sdParts = new StorageDescriptorParts(); sdParts.containingPartition = part; part.setDbName(dbName); part.setTableName(tableName); part.setValues(partVals); part.setCreateTime((int)proto.getCreateTime()); part.setLastAccessTime((int)proto.getLastAccessTime()); if (proto.hasLocation()) sdParts.location = proto.getLocation(); if (proto.hasSdParameters()) sdParts.parameters = buildParameters(proto.getSdParameters()); sdParts.sdHash = proto.getSdHash().toByteArray(); if (proto.hasParameters()) part.setParameters(buildParameters(proto.getParameters())); return sdParts; } static String[] deserializeKey(byte[] key) { String k = new String(key, ENCODING); return k.split(KEY_SEPARATOR_STR); } private static List<String> desierliazeDbNameTableNameFromPartitionKey(byte[] key, Configuration conf) { StringBuffer names = new StringBuffer(); names.append("dbName,tableName,"); StringBuffer types = new StringBuffer(); types.append("string,string,"); BinarySortableSerDe serDe = new BinarySortableSerDe(); Properties props = new Properties(); props.setProperty(serdeConstants.LIST_COLUMNS, names.toString()); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString()); try { serDe.initialize(conf, props); List deserializedkeys = ((List)serDe.deserialize(new BytesWritable(key))).subList(0, 2); List<String> keys = new ArrayList<>(); for (int i=0;i<deserializedkeys.size();i++) { Object deserializedKey = deserializedkeys.get(i); if (deserializedKey==null) { throw new RuntimeException("Can't have a null dbname or tablename"); } else { TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString("string"); ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType); Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector); keys.add((String) converter.convert(deserializedKey)); } } return keys; } catch (SerDeException e) { throw new RuntimeException("Error when deserialize key", e); } } // Deserialize a partition key and return _only_ the partition values. private static List<String> deserializePartitionKey(List<FieldSchema> partitions, byte[] key, Configuration conf) { StringBuffer names = new StringBuffer(); names.append("dbName,tableName,"); StringBuffer types = new StringBuffer(); types.append("string,string,"); for (int i=0;i<partitions.size();i++) { names.append(partitions.get(i).getName()); types.append(TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType())); if (i!=partitions.size()-1) { names.append(","); types.append(","); } } BinarySortableSerDe serDe = new BinarySortableSerDe(); Properties props = new Properties(); props.setProperty(serdeConstants.LIST_COLUMNS, names.toString()); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString()); try { serDe.initialize(conf, props); List deserializedkeys = ((List)serDe.deserialize(new BytesWritable(key))).subList(2, partitions.size()+2); List<String> partitionKeys = new ArrayList<String>(); for (int i=0;i<deserializedkeys.size();i++) { Object deserializedKey = deserializedkeys.get(i); if (deserializedKey==null) { partitionKeys.add(HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME)); } else { TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType()); ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType); Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector); partitionKeys.add((String)converter.convert(deserializedKey)); } } return partitionKeys; } catch (SerDeException e) { throw new RuntimeException("Error when deserialize key", e); } } /** * Serialize a table * @param table table object * @param sdHash hash that is being used as a key for the enclosed storage descriptor * @return First element is the key, second is the serialized table */ static byte[][] serializeTable(Table table, byte[] sdHash) { byte[][] result = new byte[2][]; result[0] = buildKey(HiveStringUtils.normalizeIdentifier(table.getDbName()), HiveStringUtils.normalizeIdentifier(table.getTableName())); HbaseMetastoreProto.Table.Builder builder = HbaseMetastoreProto.Table.newBuilder(); if (table.getOwner() != null) builder.setOwner(table.getOwner()); builder .setCreateTime(table.getCreateTime()) .setLastAccessTime(table.getLastAccessTime()) .setRetention(table.getRetention()); if (table.getSd().getLocation() != null) builder.setLocation(table.getSd().getLocation()); if (table.getSd().getParameters() != null) { builder.setSdParameters(buildParameters(table.getSd().getParameters())); } builder.setSdHash(ByteString.copyFrom(sdHash)); if (table.getPartitionKeys() != null) { builder.addAllPartitionKeys(convertFieldSchemaListToProto(table.getPartitionKeys())); } if (table.getParameters() != null) { builder.setParameters(buildParameters(table.getParameters())); } if (table.getViewOriginalText() != null) { builder.setViewOriginalText(table.getViewOriginalText()); } if (table.getViewExpandedText() != null) { builder.setViewExpandedText(table.getViewExpandedText()); } builder.setIsRewriteEnabled(table.isRewriteEnabled()); if (table.getTableType() != null) builder.setTableType(table.getTableType()); if (table.getPrivileges() != null) { builder.setPrivileges(buildPrincipalPrivilegeSet(table.getPrivileges())); } // Set only if table is temporary if (table.isTemporary()) { builder.setIsTemporary(table.isTemporary()); } result[1] = builder.build().toByteArray(); return result; } /** * Deserialize a table. This version should be used when the table key is not already * known (eg a scan). * @param key the key fetched from HBase * @param serialized the value fetched from HBase * @return A struct that contains the table plus parts of the storage descriptor */ static StorageDescriptorParts deserializeTable(byte[] key, byte[] serialized) throws InvalidProtocolBufferException { String[] keys = deserializeKey(key); return deserializeTable(keys[0], keys[1], serialized); } /** * Deserialize a table. This version should be used when the table key is * known (eg a get). * @param dbName database name * @param tableName table name * @param serialized the value fetched from HBase * @return A struct that contains the partition plus parts of the storage descriptor */ static StorageDescriptorParts deserializeTable(String dbName, String tableName, byte[] serialized) throws InvalidProtocolBufferException { HbaseMetastoreProto.Table proto = HbaseMetastoreProto.Table.parseFrom(serialized); Table table = new Table(); StorageDescriptorParts sdParts = new StorageDescriptorParts(); sdParts.containingTable = table; table.setDbName(dbName); table.setTableName(tableName); table.setOwner(proto.getOwner()); table.setCreateTime((int)proto.getCreateTime()); table.setLastAccessTime((int)proto.getLastAccessTime()); table.setRetention((int)proto.getRetention()); if (proto.hasLocation()) sdParts.location = proto.getLocation(); if (proto.hasSdParameters()) sdParts.parameters = buildParameters(proto.getSdParameters()); sdParts.sdHash = proto.getSdHash().toByteArray(); table.setPartitionKeys(convertFieldSchemaListFromProto(proto.getPartitionKeysList())); table.setParameters(buildParameters(proto.getParameters())); if (proto.hasViewOriginalText()) table.setViewOriginalText(proto.getViewOriginalText()); if (proto.hasViewExpandedText()) table.setViewExpandedText(proto.getViewExpandedText()); table.setRewriteEnabled(proto.getIsRewriteEnabled()); table.setTableType(proto.getTableType()); if (proto.hasPrivileges()) { table.setPrivileges(buildPrincipalPrivilegeSet(proto.getPrivileges())); } if (proto.hasIsTemporary()) table.setTemporary(proto.getIsTemporary()); return sdParts; } /** * Serialize an index * @param index index object * @param sdHash hash that is being used as a key for the enclosed storage descriptor * @return First element is the key, second is the serialized index */ static byte[][] serializeIndex(Index index, byte[] sdHash) { byte[][] result = new byte[2][]; result[0] = buildKey(HiveStringUtils.normalizeIdentifier(index.getDbName()), HiveStringUtils.normalizeIdentifier(index.getOrigTableName()), HiveStringUtils.normalizeIdentifier(index.getIndexName())); HbaseMetastoreProto.Index.Builder builder = HbaseMetastoreProto.Index.newBuilder(); builder.setDbName(index.getDbName()); builder.setOrigTableName(index.getOrigTableName()); if (index.getSd().getLocation() != null) builder.setLocation(index.getSd().getLocation()); if (index.getSd().getParameters() != null) { builder.setSdParameters(buildParameters(index.getSd().getParameters())); } if (index.getIndexHandlerClass() != null) { builder.setIndexHandlerClass(index.getIndexHandlerClass()); } if (index.getIndexTableName() != null) { builder.setIndexTableName(index.getIndexTableName()); } builder .setCreateTime(index.getCreateTime()) .setLastAccessTime(index.getLastAccessTime()) .setDeferredRebuild(index.isDeferredRebuild()); if (index.getParameters() != null) { builder.setParameters(buildParameters(index.getParameters())); } if (sdHash != null) { builder.setSdHash(ByteString.copyFrom(sdHash)); } result[1] = builder.build().toByteArray(); return result; } /** * Deserialize an index. This version should be used when the index key is not already * known (eg a scan). * @param key the key fetched from HBase * @param serialized the value fetched from HBase * @return A struct that contains the index plus parts of the storage descriptor */ static StorageDescriptorParts deserializeIndex(byte[] key, byte[] serialized) throws InvalidProtocolBufferException { String[] keys = deserializeKey(key); return deserializeIndex(keys[0], keys[1], keys[2], serialized); } /** * Deserialize an index. This version should be used when the table key is * known (eg a get). * @param dbName database name * @param origTableName original table name * @param indexName index name * @param serialized the value fetched from HBase * @return A struct that contains the index plus parts of the storage descriptor */ static StorageDescriptorParts deserializeIndex(String dbName, String origTableName, String indexName, byte[] serialized) throws InvalidProtocolBufferException { HbaseMetastoreProto.Index proto = HbaseMetastoreProto.Index.parseFrom(serialized); Index index = new Index(); StorageDescriptorParts sdParts = new StorageDescriptorParts(); sdParts.containingIndex = index; index.setDbName(dbName); index.setIndexName(indexName); index.setOrigTableName(origTableName); if (proto.hasLocation()) sdParts.location = proto.getLocation(); if (proto.hasSdParameters()) sdParts.parameters = buildParameters(proto.getSdParameters()); if (proto.hasIndexHandlerClass()) { index.setIndexHandlerClass(proto.getIndexHandlerClass()); } if (proto.hasIndexTableName()) { index.setIndexTableName(proto.getIndexTableName()); } index.setCreateTime(proto.getCreateTime()); index.setLastAccessTime(proto.getLastAccessTime()); index.setDeferredRebuild(proto.getDeferredRebuild()); index.setParameters(buildParameters(proto.getParameters())); if (proto.hasSdHash()) { sdParts.sdHash = proto.getSdHash().toByteArray(); } return sdParts; } static byte[] serializeBloomFilter(String dbName, String tableName, BloomFilter bloom) { long[] bitSet = bloom.getBitSet(); List<Long> bits = new ArrayList<>(bitSet.length); for (int i = 0; i < bitSet.length; i++) bits.add(bitSet[i]); HbaseMetastoreProto.AggrStatsBloomFilter.BloomFilter protoBloom = HbaseMetastoreProto.AggrStatsBloomFilter.BloomFilter.newBuilder() .setNumBits(bloom.getBitSize()) .setNumFuncs(bloom.getNumHashFunctions()) .addAllBits(bits) .build(); HbaseMetastoreProto.AggrStatsBloomFilter proto = HbaseMetastoreProto.AggrStatsBloomFilter.newBuilder() .setDbName(ByteString.copyFrom(dbName.getBytes(ENCODING))) .setTableName(ByteString.copyFrom(tableName.getBytes(ENCODING))) .setBloomFilter(protoBloom) .setAggregatedAt(System.currentTimeMillis()) .build(); return proto.toByteArray(); } private static HbaseMetastoreProto.ColumnStats protoBufStatsForOneColumn( ColumnStatistics partitionColumnStats, ColumnStatisticsObj colStats) throws IOException { HbaseMetastoreProto.ColumnStats.Builder builder = HbaseMetastoreProto.ColumnStats.newBuilder(); if (partitionColumnStats != null) { builder.setLastAnalyzed(partitionColumnStats.getStatsDesc().getLastAnalyzed()); } assert colStats.getColType() != null; builder.setColumnType(colStats.getColType()); assert colStats.getColName() != null; builder.setColumnName(colStats.getColName()); ColumnStatisticsData colData = colStats.getStatsData(); switch (colData.getSetField()) { case BOOLEAN_STATS: BooleanColumnStatsData boolData = colData.getBooleanStats(); builder.setNumNulls(boolData.getNumNulls()); builder.setBoolStats(HbaseMetastoreProto.ColumnStats.BooleanStats.newBuilder() .setNumTrues(boolData.getNumTrues()).setNumFalses(boolData.getNumFalses()).build()); break; case LONG_STATS: LongColumnStatsData longData = colData.getLongStats(); builder.setNumNulls(longData.getNumNulls()); builder.setNumDistinctValues(longData.getNumDVs()); if (longData.isSetBitVectors()) { builder.setBitVectors(longData.getBitVectors()); } builder.setLongStats(HbaseMetastoreProto.ColumnStats.LongStats.newBuilder() .setLowValue(longData.getLowValue()).setHighValue(longData.getHighValue()).build()); break; case DOUBLE_STATS: DoubleColumnStatsData doubleData = colData.getDoubleStats(); builder.setNumNulls(doubleData.getNumNulls()); builder.setNumDistinctValues(doubleData.getNumDVs()); if (doubleData.isSetBitVectors()) { builder.setBitVectors(doubleData.getBitVectors()); } builder.setDoubleStats(HbaseMetastoreProto.ColumnStats.DoubleStats.newBuilder() .setLowValue(doubleData.getLowValue()).setHighValue(doubleData.getHighValue()).build()); break; case STRING_STATS: StringColumnStatsData stringData = colData.getStringStats(); builder.setNumNulls(stringData.getNumNulls()); builder.setNumDistinctValues(stringData.getNumDVs()); if (stringData.isSetBitVectors()) { builder.setBitVectors(stringData.getBitVectors()); } builder.setStringStats(HbaseMetastoreProto.ColumnStats.StringStats.newBuilder() .setMaxColLength(stringData.getMaxColLen()).setAvgColLength(stringData.getAvgColLen()) .build()); break; case BINARY_STATS: BinaryColumnStatsData binaryData = colData.getBinaryStats(); builder.setNumNulls(binaryData.getNumNulls()); builder.setBinaryStats(HbaseMetastoreProto.ColumnStats.StringStats.newBuilder() .setMaxColLength(binaryData.getMaxColLen()).setAvgColLength(binaryData.getAvgColLen()) .build()); break; case DECIMAL_STATS: DecimalColumnStatsData decimalData = colData.getDecimalStats(); builder.setNumNulls(decimalData.getNumNulls()); builder.setNumDistinctValues(decimalData.getNumDVs()); if (decimalData.isSetBitVectors()) { builder.setBitVectors(decimalData.getBitVectors()); } if (decimalData.getLowValue() != null && decimalData.getHighValue() != null) { builder.setDecimalStats( HbaseMetastoreProto.ColumnStats.DecimalStats .newBuilder() .setLowValue( HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() .setUnscaled(ByteString.copyFrom(decimalData.getLowValue().getUnscaled())) .setScale(decimalData.getLowValue().getScale()).build()) .setHighValue( HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled())) .setScale(decimalData.getHighValue().getScale()).build())).build(); } else { builder.setDecimalStats(HbaseMetastoreProto.ColumnStats.DecimalStats.newBuilder().clear() .build()); } break; default: throw new RuntimeException("Woh, bad. Unknown stats type!"); } return builder.build(); } static byte[] serializeStatsForOneColumn(ColumnStatistics partitionColumnStats, ColumnStatisticsObj colStats) throws IOException { return protoBufStatsForOneColumn(partitionColumnStats, colStats).toByteArray(); } static ColumnStatisticsObj deserializeStatsForOneColumn(ColumnStatistics partitionColumnStats, byte[] bytes) throws IOException { HbaseMetastoreProto.ColumnStats proto = HbaseMetastoreProto.ColumnStats.parseFrom(bytes); return statsForOneColumnFromProtoBuf(partitionColumnStats, proto); } private static ColumnStatisticsObj statsForOneColumnFromProtoBuf(ColumnStatistics partitionColumnStats, HbaseMetastoreProto.ColumnStats proto) throws IOException { ColumnStatisticsObj colStats = new ColumnStatisticsObj(); long lastAnalyzed = proto.getLastAnalyzed(); if (partitionColumnStats != null) { partitionColumnStats.getStatsDesc().setLastAnalyzed( Math.max(lastAnalyzed, partitionColumnStats.getStatsDesc().getLastAnalyzed())); } colStats.setColType(proto.getColumnType()); colStats.setColName(proto.getColumnName()); ColumnStatisticsData colData = new ColumnStatisticsData(); if (proto.hasBoolStats()) { BooleanColumnStatsData boolData = new BooleanColumnStatsData(); boolData.setNumTrues(proto.getBoolStats().getNumTrues()); boolData.setNumFalses(proto.getBoolStats().getNumFalses()); boolData.setNumNulls(proto.getNumNulls()); colData.setBooleanStats(boolData); } else if (proto.hasLongStats()) { LongColumnStatsData longData = new LongColumnStatsData(); if (proto.getLongStats().hasLowValue()) { longData.setLowValue(proto.getLongStats().getLowValue()); } if (proto.getLongStats().hasHighValue()) { longData.setHighValue(proto.getLongStats().getHighValue()); } longData.setNumNulls(proto.getNumNulls()); longData.setNumDVs(proto.getNumDistinctValues()); longData.setBitVectors(proto.getBitVectors()); colData.setLongStats(longData); } else if (proto.hasDoubleStats()) { DoubleColumnStatsData doubleData = new DoubleColumnStatsData(); if (proto.getDoubleStats().hasLowValue()) { doubleData.setLowValue(proto.getDoubleStats().getLowValue()); } if (proto.getDoubleStats().hasHighValue()) { doubleData.setHighValue(proto.getDoubleStats().getHighValue()); } doubleData.setNumNulls(proto.getNumNulls()); doubleData.setNumDVs(proto.getNumDistinctValues()); doubleData.setBitVectors(proto.getBitVectors()); colData.setDoubleStats(doubleData); } else if (proto.hasStringStats()) { StringColumnStatsData stringData = new StringColumnStatsData(); stringData.setMaxColLen(proto.getStringStats().getMaxColLength()); stringData.setAvgColLen(proto.getStringStats().getAvgColLength()); stringData.setNumNulls(proto.getNumNulls()); stringData.setNumDVs(proto.getNumDistinctValues()); stringData.setBitVectors(proto.getBitVectors()); colData.setStringStats(stringData); } else if (proto.hasBinaryStats()) { BinaryColumnStatsData binaryData = new BinaryColumnStatsData(); binaryData.setMaxColLen(proto.getBinaryStats().getMaxColLength()); binaryData.setAvgColLen(proto.getBinaryStats().getAvgColLength()); binaryData.setNumNulls(proto.getNumNulls()); colData.setBinaryStats(binaryData); } else if (proto.hasDecimalStats()) { DecimalColumnStatsData decimalData = new DecimalColumnStatsData(); if (proto.getDecimalStats().hasHighValue()) { Decimal hiVal = new Decimal(); hiVal.setUnscaled(proto.getDecimalStats().getHighValue().getUnscaled().toByteArray()); hiVal.setScale((short) proto.getDecimalStats().getHighValue().getScale()); decimalData.setHighValue(hiVal); } if (proto.getDecimalStats().hasLowValue()) { Decimal loVal = new Decimal(); loVal.setUnscaled(proto.getDecimalStats().getLowValue().getUnscaled().toByteArray()); loVal.setScale((short) proto.getDecimalStats().getLowValue().getScale()); decimalData.setLowValue(loVal); } decimalData.setNumNulls(proto.getNumNulls()); decimalData.setNumDVs(proto.getNumDistinctValues()); decimalData.setBitVectors(proto.getBitVectors()); colData.setDecimalStats(decimalData); } else { throw new RuntimeException("Woh, bad. Unknown stats type!"); } colStats.setStatsData(colData); return colStats; } static byte[] serializeAggrStats(AggrStats aggrStats) throws IOException { List<HbaseMetastoreProto.ColumnStats> protoColStats = new ArrayList<>(aggrStats.getColStatsSize()); for (ColumnStatisticsObj cso : aggrStats.getColStats()) { protoColStats.add(protoBufStatsForOneColumn(null, cso)); } return HbaseMetastoreProto.AggrStats.newBuilder() .setPartsFound(aggrStats.getPartsFound()) .addAllColStats(protoColStats) .build() .toByteArray(); } static AggrStats deserializeAggrStats(byte[] serialized) throws IOException { HbaseMetastoreProto.AggrStats protoAggrStats = HbaseMetastoreProto.AggrStats.parseFrom(serialized); AggrStats aggrStats = new AggrStats(); aggrStats.setPartsFound(protoAggrStats.getPartsFound()); for (HbaseMetastoreProto.ColumnStats protoCS : protoAggrStats.getColStatsList()) { aggrStats.addToColStats(statsForOneColumnFromProtoBuf(null, protoCS)); } return aggrStats; } /** * Serialize a delegation token * @param tokenIdentifier * @param delegationToken * @return two byte arrays, first contains the key, the second the serialized value. */ static byte[][] serializeDelegationToken(String tokenIdentifier, String delegationToken) { byte[][] result = new byte[2][]; result[0] = buildKey(tokenIdentifier); result[1] = HbaseMetastoreProto.DelegationToken.newBuilder() .setTokenStr(delegationToken) .build() .toByteArray(); return result; } /** * Deserialize a delegation token. * @param value value fetched from hbase * @return A delegation token. * @throws InvalidProtocolBufferException */ static String deserializeDelegationToken(byte[] value) throws InvalidProtocolBufferException { HbaseMetastoreProto.DelegationToken protoToken = HbaseMetastoreProto.DelegationToken.parseFrom(value); return protoToken.getTokenStr(); } /** * Serialize a master key * @param seqNo * @param key * @return two byte arrays, first contains the key, the second the serialized value. */ static byte[][] serializeMasterKey(Integer seqNo, String key) { byte[][] result = new byte[2][]; result[0] = buildKey(seqNo.toString()); result[1] = HbaseMetastoreProto.MasterKey.newBuilder() .setMasterKey(key) .build() .toByteArray(); return result; } /** * Deserialize a master key. * @param value value fetched from hbase * @return A master key * @throws InvalidProtocolBufferException */ static String deserializeMasterKey(byte[] value) throws InvalidProtocolBufferException { HbaseMetastoreProto.MasterKey protoKey = HbaseMetastoreProto.MasterKey.parseFrom(value); return protoKey.getMasterKey(); } /** * Serialize the primary key for a table. * @param pk Primary key columns. It is expected that all of these match to one pk, since * anything else is meaningless. * @return two byte arrays, first containts the hbase key, the second the serialized value. */ static byte[][] serializePrimaryKey(List<SQLPrimaryKey> pk) { // First, figure out the dbName and tableName. We expect this to match for all list entries. byte[][] result = new byte[2][]; String dbName = pk.get(0).getTable_db(); String tableName = pk.get(0).getTable_name(); result[0] = buildKey(HiveStringUtils.normalizeIdentifier(dbName), HiveStringUtils.normalizeIdentifier(tableName)); HbaseMetastoreProto.PrimaryKey.Builder builder = HbaseMetastoreProto.PrimaryKey.newBuilder(); // Encode the primary key, if present builder.setPkName(pk.get(0).getPk_name()); builder.setEnableConstraint(pk.get(0).isEnable_cstr()); builder.setValidateConstraint(pk.get(0).isValidate_cstr()); builder.setRelyConstraint(pk.get(0).isRely_cstr()); for (SQLPrimaryKey pkcol : pk) { HbaseMetastoreProto.PrimaryKey.PrimaryKeyColumn.Builder pkColBuilder = HbaseMetastoreProto.PrimaryKey.PrimaryKeyColumn.newBuilder(); pkColBuilder.setColumnName(pkcol.getColumn_name()); pkColBuilder.setKeySeq(pkcol.getKey_seq()); builder.addCols(pkColBuilder); } result[1] = builder.build().toByteArray(); return result; } /** * Serialize the foreign key(s) for a table. * @param fks Foreign key columns. These may belong to multiple foreign keys. * @return two byte arrays, first containts the key, the second the serialized value. */ static byte[][] serializeForeignKeys(List<SQLForeignKey> fks) { // First, figure out the dbName and tableName. We expect this to match for all list entries. byte[][] result = new byte[2][]; String dbName = fks.get(0).getFktable_db(); String tableName = fks.get(0).getFktable_name(); result[0] = buildKey(HiveStringUtils.normalizeIdentifier(dbName), HiveStringUtils.normalizeIdentifier(tableName)); HbaseMetastoreProto.ForeignKeys.Builder builder = HbaseMetastoreProto.ForeignKeys.newBuilder(); // Encode any foreign keys we find. This can be complex because there may be more than // one foreign key in here, so we need to detect that. Map<String, HbaseMetastoreProto.ForeignKeys.ForeignKey.Builder> fkBuilders = new HashMap<>(); for (SQLForeignKey fkcol : fks) { HbaseMetastoreProto.ForeignKeys.ForeignKey.Builder fkBuilder = fkBuilders.get(fkcol.getFk_name()); if (fkBuilder == null) { // We haven't seen this key before, so add it fkBuilder = HbaseMetastoreProto.ForeignKeys.ForeignKey.newBuilder(); fkBuilder.setFkName(fkcol.getFk_name()); fkBuilder.setReferencedDbName(fkcol.getPktable_db()); assert dbName.equals(fkcol.getFktable_db()) : "You switched databases on me!"; fkBuilder.setReferencedTableName(fkcol.getPktable_name()); assert tableName.equals(fkcol.getFktable_name()) : "You switched tables on me!"; fkBuilder.setReferencedPkName(fkcol.getPk_name()); fkBuilder.setUpdateRule(fkcol.getUpdate_rule()); fkBuilder.setDeleteRule(fkcol.getDelete_rule()); fkBuilder.setEnableConstraint(fkcol.isEnable_cstr()); fkBuilder.setValidateConstraint(fkcol.isValidate_cstr()); fkBuilder.setRelyConstraint(fkcol.isRely_cstr()); fkBuilders.put(fkcol.getFk_name(), fkBuilder); } HbaseMetastoreProto.ForeignKeys.ForeignKey.ForeignKeyColumn.Builder fkColBuilder = HbaseMetastoreProto.ForeignKeys.ForeignKey.ForeignKeyColumn.newBuilder(); fkColBuilder.setColumnName(fkcol.getFkcolumn_name()); fkColBuilder.setReferencedColumnName(fkcol.getPkcolumn_name()); fkColBuilder.setKeySeq(fkcol.getKey_seq()); fkBuilder.addCols(fkColBuilder); } for (HbaseMetastoreProto.ForeignKeys.ForeignKey.Builder fkBuilder : fkBuilders.values()) { builder.addFks(fkBuilder); } result[1] = builder.build().toByteArray(); return result; } static List<SQLPrimaryKey> deserializePrimaryKey(String dbName, String tableName, byte[] value) throws InvalidProtocolBufferException { HbaseMetastoreProto.PrimaryKey proto = HbaseMetastoreProto.PrimaryKey.parseFrom(value); List<SQLPrimaryKey> result = new ArrayList<>(); for (HbaseMetastoreProto.PrimaryKey.PrimaryKeyColumn protoPkCol : proto.getColsList()) { result.add(new SQLPrimaryKey(dbName, tableName, protoPkCol.getColumnName(), protoPkCol.getKeySeq(), proto.getPkName(), proto.getEnableConstraint(), proto.getValidateConstraint(), proto.getRelyConstraint())); } return result; } static List<SQLForeignKey> deserializeForeignKeys(String dbName, String tableName, byte[] value) throws InvalidProtocolBufferException { List<SQLForeignKey> result = new ArrayList<>(); HbaseMetastoreProto.ForeignKeys protoConstraints = HbaseMetastoreProto.ForeignKeys.parseFrom(value); for (HbaseMetastoreProto.ForeignKeys.ForeignKey protoFk : protoConstraints.getFksList()) { for (HbaseMetastoreProto.ForeignKeys.ForeignKey.ForeignKeyColumn protoFkCol : protoFk.getColsList()) { result.add(new SQLForeignKey(protoFk.getReferencedDbName(), protoFk.getReferencedTableName(), protoFkCol.getReferencedColumnName(), dbName, tableName, protoFkCol.getColumnName(), protoFkCol.getKeySeq(), protoFk.getUpdateRule(), protoFk.getDeleteRule(), protoFk.getFkName(), protoFk.getReferencedPkName(), protoFk.getEnableConstraint(), protoFk.getValidateConstraint(), protoFk.getRelyConstraint())); } } return result; } /** * @param keyStart byte array representing the start prefix * @return byte array corresponding to the next possible prefix */ static byte[] getEndPrefix(byte[] keyStart) { if (keyStart == null) { return null; } // Since this is a prefix and not full key, the usual hbase technique of // appending 0 byte does not work. Instead of that, increment the last byte. byte[] keyEnd = Arrays.copyOf(keyStart, keyStart.length); keyEnd[keyEnd.length - 1]++; return keyEnd; } static byte[] makeLongKey(long v) { byte[] b = new byte[8]; b[0] = (byte)(v >>> 56); b[1] = (byte)(v >>> 48); b[2] = (byte)(v >>> 40); b[3] = (byte)(v >>> 32); b[4] = (byte)(v >>> 24); b[5] = (byte)(v >>> 16); b[6] = (byte)(v >>> 8); b[7] = (byte)(v >>> 0); return b; } public static double getDoubleValue(Decimal decimal) { return new BigDecimal(new BigInteger(decimal.getUnscaled()), decimal.getScale()).doubleValue(); } }