/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hadoop.hive.metastore.hbase; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Iterators; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; import org.apache.hadoop.hive.metastore.api.SQLForeignKey; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Row; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Function; import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.Role; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.hbase.PartitionKeyComparator.Operator; import org.apache.hive.common.util.BloomFilter; import org.apache.thrift.TBase; import org.apache.thrift.TException; import org.apache.thrift.protocol.TProtocol; import org.apache.thrift.protocol.TSimpleJSONProtocol; import org.apache.thrift.transport.TMemoryBuffer; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Set; /** * Class to manage storing object in and reading them from HBase. */ public class HBaseReadWrite implements MetadataStore { final static String AGGR_STATS_TABLE = "HBMS_AGGR_STATS"; final static String DB_TABLE = "HBMS_DBS"; final static String FUNC_TABLE = "HBMS_FUNCS"; final static String GLOBAL_PRIVS_TABLE = "HBMS_GLOBAL_PRIVS"; final static String PART_TABLE = "HBMS_PARTITIONS"; final static String ROLE_TABLE = "HBMS_ROLES"; final static String SD_TABLE = "HBMS_SDS"; final static String SECURITY_TABLE = "HBMS_SECURITY"; final static String SEQUENCES_TABLE = "HBMS_SEQUENCES"; final static String TABLE_TABLE = "HBMS_TBLS"; final static String INDEX_TABLE = "HBMS_INDEX"; final static String USER_TO_ROLE_TABLE = "HBMS_USER_TO_ROLE"; final static String FILE_METADATA_TABLE = "HBMS_FILE_METADATA"; final static byte[] CATALOG_CF = "c".getBytes(HBaseUtils.ENCODING); final static byte[] STATS_CF = "s".getBytes(HBaseUtils.ENCODING); final static String NO_CACHE_CONF = "no.use.cache"; /** * List of tables in HBase */ public final static String[] tableNames = { AGGR_STATS_TABLE, DB_TABLE, FUNC_TABLE, GLOBAL_PRIVS_TABLE, PART_TABLE, USER_TO_ROLE_TABLE, ROLE_TABLE, SD_TABLE, SECURITY_TABLE, SEQUENCES_TABLE, TABLE_TABLE, INDEX_TABLE, FILE_METADATA_TABLE }; public final static Map<String, List<byte[]>> columnFamilies = new HashMap<> (tableNames.length); static { columnFamilies.put(AGGR_STATS_TABLE, Arrays.asList(CATALOG_CF)); columnFamilies.put(DB_TABLE, Arrays.asList(CATALOG_CF)); columnFamilies.put(FUNC_TABLE, Arrays.asList(CATALOG_CF)); columnFamilies.put(GLOBAL_PRIVS_TABLE, Arrays.asList(CATALOG_CF)); columnFamilies.put(PART_TABLE, Arrays.asList(CATALOG_CF, STATS_CF)); columnFamilies.put(USER_TO_ROLE_TABLE, Arrays.asList(CATALOG_CF)); columnFamilies.put(ROLE_TABLE, Arrays.asList(CATALOG_CF)); columnFamilies.put(SD_TABLE, Arrays.asList(CATALOG_CF)); columnFamilies.put(SECURITY_TABLE, Arrays.asList(CATALOG_CF)); columnFamilies.put(SEQUENCES_TABLE, Arrays.asList(CATALOG_CF)); columnFamilies.put(TABLE_TABLE, Arrays.asList(CATALOG_CF, STATS_CF)); columnFamilies.put(INDEX_TABLE, Arrays.asList(CATALOG_CF, STATS_CF)); // Stats CF will contain PPD stats. columnFamilies.put(FILE_METADATA_TABLE, Arrays.asList(CATALOG_CF, STATS_CF)); } final static byte[] MASTER_KEY_SEQUENCE = "master_key".getBytes(HBaseUtils.ENCODING); // The change version functionality uses the sequences table, but we don't want to give the // caller complete control over the sequence name as they might inadvertently clash with one of // our sequence keys, so add a prefix to their topic name. final static byte[] AGGR_STATS_BLOOM_COL = "b".getBytes(HBaseUtils.ENCODING); private final static byte[] AGGR_STATS_STATS_COL = "s".getBytes(HBaseUtils.ENCODING); private final static byte[] CATALOG_COL = "c".getBytes(HBaseUtils.ENCODING); private final static byte[] ROLES_COL = "roles".getBytes(HBaseUtils.ENCODING); private final static byte[] REF_COUNT_COL = "ref".getBytes(HBaseUtils.ENCODING); private final static byte[] DELEGATION_TOKEN_COL = "dt".getBytes(HBaseUtils.ENCODING); private final static byte[] MASTER_KEY_COL = "mk".getBytes(HBaseUtils.ENCODING); private final static byte[] PRIMARY_KEY_COL = "pk".getBytes(HBaseUtils.ENCODING); private final static byte[] FOREIGN_KEY_COL = "fk".getBytes(HBaseUtils.ENCODING); private final static byte[] GLOBAL_PRIVS_KEY = "gp".getBytes(HBaseUtils.ENCODING); private final static byte[] SEQUENCES_KEY = "seq".getBytes(HBaseUtils.ENCODING); private final static int TABLES_TO_CACHE = 10; // False positives are very bad here because they cause us to invalidate entries we shouldn't. // Space used and # of hash functions grows in proportion to ln of num bits so a 10x increase // in accuracy doubles the required space and number of hash functions. private final static double STATS_BF_ERROR_RATE = 0.001; @VisibleForTesting final static String TEST_CONN = "test_connection"; private static HBaseConnection testConn; static final private Logger LOG = LoggerFactory.getLogger(HBaseReadWrite.class.getName()); private static ThreadLocal<HBaseReadWrite> self = new ThreadLocal<HBaseReadWrite>() { @Override protected HBaseReadWrite initialValue() { if (staticConf == null) { throw new RuntimeException("Attempt to create HBaseReadWrite with no configuration set"); } return new HBaseReadWrite(staticConf); } }; private static boolean tablesCreated = false; private static Configuration staticConf = null; private final Configuration conf; private HBaseConnection conn; private MessageDigest md; private ObjectCache<ObjectPair<String, String>, Table> tableCache; private ObjectCache<ByteArrayWrapper, StorageDescriptor> sdCache; private PartitionCache partCache; private StatsCache statsCache; private Counter tableHits; private Counter tableMisses; private Counter tableOverflows; private Counter partHits; private Counter partMisses; private Counter partOverflows; private Counter sdHits; private Counter sdMisses; private Counter sdOverflows; private List<Counter> counters; // roleCache doesn't use ObjectCache because I don't want to limit the size. I am assuming // that the number of roles will always be small (< 100) so caching the whole thing should not // be painful. private final Map<String, HbaseMetastoreProto.RoleGrantInfoList> roleCache; boolean entireRoleTableInCache; /** * Set the configuration for all HBaseReadWrite instances. * @param configuration Configuration object */ public static synchronized void setConf(Configuration configuration) { if (staticConf == null) { staticConf = configuration; } else { LOG.info("Attempt to set conf when it has already been set."); } } /** * Get the instance of HBaseReadWrite for the current thread. This can only be called after * {@link #setConf} has been called. Woe betide you if that's not the case. * @return thread's instance of HBaseReadWrite */ static HBaseReadWrite getInstance() { if (staticConf == null) { throw new RuntimeException("Must set conf object before getting an instance"); } return self.get(); } public Configuration getConf() { return conf; } private HBaseReadWrite(Configuration configuration) { conf = configuration; HBaseConfiguration.addHbaseResources(conf); try { String connClass = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORE_HBASE_CONNECTION_CLASS); if (TEST_CONN.equals(connClass)) { conn = testConn; LOG.debug("Using test connection."); } else { LOG.debug("Instantiating connection class " + connClass); Class c = Class.forName(connClass); Object o = c.newInstance(); if (HBaseConnection.class.isAssignableFrom(o.getClass())) { conn = (HBaseConnection) o; } else { throw new IOException(connClass + " is not an instance of HBaseConnection."); } conn.setConf(conf); conn.connect(); } } catch (Exception e) { throw new RuntimeException(e); } try { md = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { throw new RuntimeException(e); } int totalCatalogObjectsToCache = HiveConf.getIntVar(conf, HiveConf.ConfVars.METASTORE_HBASE_CATALOG_CACHE_SIZE); tableHits = new Counter("table cache hits"); tableMisses = new Counter("table cache misses"); tableOverflows = new Counter("table cache overflows"); partHits = new Counter("partition cache hits"); partMisses = new Counter("partition cache misses"); partOverflows = new Counter("partition cache overflows"); sdHits = new Counter("storage descriptor cache hits"); sdMisses = new Counter("storage descriptor cache misses"); sdOverflows = new Counter("storage descriptor cache overflows"); counters = new ArrayList<>(); counters.add(tableHits); counters.add(tableMisses); counters.add(tableOverflows); counters.add(partHits); counters.add(partMisses); counters.add(partOverflows); counters.add(sdHits); counters.add(sdMisses); counters.add(sdOverflows); // Give 1% of catalog cache space to storage descriptors // (storage descriptors are shared, so 99% should be the same for a given table) int sdsCacheSize = totalCatalogObjectsToCache / 100; if (conf.getBoolean(NO_CACHE_CONF, false)) { tableCache = new BogusObjectCache<>(); sdCache = new BogusObjectCache<>(); partCache = new BogusPartitionCache(); } else { tableCache = new ObjectCache<>(TABLES_TO_CACHE, tableHits, tableMisses, tableOverflows); sdCache = new ObjectCache<>(sdsCacheSize, sdHits, sdMisses, sdOverflows); partCache = new PartitionCache(totalCatalogObjectsToCache, partHits, partMisses, partOverflows); } statsCache = StatsCache.getInstance(conf); roleCache = new HashMap<>(); entireRoleTableInCache = false; } // Synchronize this so not everyone's doing it at once. static synchronized void createTablesIfNotExist() throws IOException { if (!tablesCreated) { for (String name : tableNames) { if (self.get().conn.getHBaseTable(name, true) == null) { List<byte[]> families = columnFamilies.get(name); self.get().conn.createHBaseTable(name, families); } } tablesCreated = true; } } /********************************************************************************************** * Transaction related methods *********************************************************************************************/ /** * Begin a transaction */ void begin() { try { conn.beginTransaction(); } catch (IOException e) { throw new RuntimeException(e); } } /** * Commit a transaction */ void commit() { try { conn.commitTransaction(); } catch (IOException e) { throw new RuntimeException(e); } } void rollback() { try { conn.rollbackTransaction(); } catch (IOException e) { throw new RuntimeException(e); } } void close() throws IOException { conn.close(); } /********************************************************************************************** * Database related methods *********************************************************************************************/ /** * Fetch a database object * @param name name of the database to fetch * @return the database object, or null if there is no such database * @throws IOException */ Database getDb(String name) throws IOException { byte[] key = HBaseUtils.buildKey(name); byte[] serialized = read(DB_TABLE, key, CATALOG_CF, CATALOG_COL); if (serialized == null) return null; return HBaseUtils.deserializeDatabase(name, serialized); } /** * Get a list of databases. * @param regex Regular expression to use in searching for database names. It is expected to * be a Java regular expression. If it is null then all databases will be returned. * @return list of databases matching the regular expression. * @throws IOException */ List<Database> scanDatabases(String regex) throws IOException { Filter filter = null; if (regex != null) { filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); } Iterator<Result> iter = scan(DB_TABLE, CATALOG_CF, CATALOG_COL, filter); List<Database> databases = new ArrayList<>(); while (iter.hasNext()) { Result result = iter.next(); databases.add(HBaseUtils.deserializeDatabase(result.getRow(), result.getValue(CATALOG_CF, CATALOG_COL))); } return databases; } /** * Store a database object * @param database database object to store * @throws IOException */ void putDb(Database database) throws IOException { byte[][] serialized = HBaseUtils.serializeDatabase(database); store(DB_TABLE, serialized[0], CATALOG_CF, CATALOG_COL, serialized[1]); } /** * Drop a database * @param name name of db to drop * @throws IOException */ void deleteDb(String name) throws IOException { byte[] key = HBaseUtils.buildKey(name); delete(DB_TABLE, key, null, null); } /** * Print out the database. Intended for use by {@link org.apache.hadoop.hive.metastore.hbase.HBaseSchemaTool} * @param name name of database to print * @return string printout of database */ String printDatabase(String name) throws IOException, TException { Database db = getDb(name); if (db == null) return noSuch(name, "database"); else return dumpThriftObject(db); } /** * Print out databases. * @param regex regular to use to search for databases * @return databases as a string, one each * @throws IOException * @throws TException */ List<String> printDatabases(String regex) throws IOException, TException { List<Database> dbs = scanDatabases(regex); if (dbs.size() == 0) { return noMatch(regex, "database"); } else { List<String> lines = new ArrayList<>(); for (Database db : dbs) lines.add(dumpThriftObject(db)); return lines; } } int getDatabaseCount() throws IOException { Filter fil = new FirstKeyOnlyFilter(); Iterator<Result> iter = scan(DB_TABLE, fil); return Iterators.size(iter); } /********************************************************************************************** * Function related methods *********************************************************************************************/ /** * Fetch a function object * @param dbName name of the database the function is in * @param functionName name of the function to fetch * @return the function object, or null if there is no such function * @throws IOException */ Function getFunction(String dbName, String functionName) throws IOException { byte[] key = HBaseUtils.buildKey(dbName, functionName); byte[] serialized = read(FUNC_TABLE, key, CATALOG_CF, CATALOG_COL); if (serialized == null) return null; return HBaseUtils.deserializeFunction(dbName, functionName, serialized); } /** * Get a list of functions. * @param dbName Name of the database to search in. * @param regex Regular expression to use in searching for function names. It is expected to * be a Java regular expression. If it is null then all functions will be returned. * @return list of functions matching the regular expression. * @throws IOException */ List<Function> scanFunctions(String dbName, String regex) throws IOException { byte[] keyPrefix = null; if (dbName != null) { keyPrefix = HBaseUtils.buildKeyWithTrailingSeparator(dbName); } Filter filter = null; if (regex != null) { filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); } Iterator<Result> iter = scan(FUNC_TABLE, keyPrefix, HBaseUtils.getEndPrefix(keyPrefix), CATALOG_CF, CATALOG_COL, filter); List<Function> functions = new ArrayList<>(); while (iter.hasNext()) { Result result = iter.next(); functions.add(HBaseUtils.deserializeFunction(result.getRow(), result.getValue(CATALOG_CF, CATALOG_COL))); } return functions; } /** * Store a function object * @param function function object to store * @throws IOException */ void putFunction(Function function) throws IOException { byte[][] serialized = HBaseUtils.serializeFunction(function); store(FUNC_TABLE, serialized[0], CATALOG_CF, CATALOG_COL, serialized[1]); } /** * Drop a function * @param dbName name of database the function is in * @param functionName name of function to drop * @throws IOException */ void deleteFunction(String dbName, String functionName) throws IOException { byte[] key = HBaseUtils.buildKey(dbName, functionName); delete(FUNC_TABLE, key, null, null); } /** * Print out a function * @param key key to get the function, must include dbname. * @return string of the function * @throws IOException * @throws TException */ String printFunction(String key) throws IOException, TException { byte[] k = HBaseUtils.buildKey(key); byte[] serialized = read(FUNC_TABLE, k, CATALOG_CF, CATALOG_COL); if (serialized == null) return noSuch(key, "function"); Function func = HBaseUtils.deserializeFunction(k, serialized); return dumpThriftObject(func); } /** * Print out functions * @param regex regular expression to use in matching functions * @return list of strings, one function each * @throws IOException * @throws TException */ List<String> printFunctions(String regex) throws IOException, TException { Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); Iterator<Result> iter = scan(FUNC_TABLE, null, null, CATALOG_CF, CATALOG_COL, filter); List<String> lines = new ArrayList<>(); while (iter.hasNext()) { Result result = iter.next(); lines.add(dumpThriftObject(HBaseUtils.deserializeFunction(result.getRow(), result.getValue(CATALOG_CF, CATALOG_COL)))); } if (lines.size() == 0) lines = noMatch(regex, "function"); return lines; } /********************************************************************************************** * Global privilege related methods *********************************************************************************************/ /** * Fetch the global privileges object * @return * @throws IOException */ PrincipalPrivilegeSet getGlobalPrivs() throws IOException { byte[] key = GLOBAL_PRIVS_KEY; byte[] serialized = read(GLOBAL_PRIVS_TABLE, key, CATALOG_CF, CATALOG_COL); if (serialized == null) return null; return HBaseUtils.deserializePrincipalPrivilegeSet(serialized); } /** * Store the global privileges object * @throws IOException */ void putGlobalPrivs(PrincipalPrivilegeSet privs) throws IOException { byte[] key = GLOBAL_PRIVS_KEY; byte[] serialized = HBaseUtils.serializePrincipalPrivilegeSet(privs); store(GLOBAL_PRIVS_TABLE, key, CATALOG_CF, CATALOG_COL, serialized); } /** * Print out the global privileges. * @return string containing the global privileges * @throws IOException * @throws TException */ String printGlobalPrivs() throws IOException, TException { PrincipalPrivilegeSet pps = getGlobalPrivs(); if (pps == null) return "No global privileges"; else return dumpThriftObject(pps); } /********************************************************************************************** * Partition related methods *********************************************************************************************/ /** * Fetch one partition * @param dbName database table is in * @param tableName table partition is in * @param partVals list of values that specify the partition, given in the same order as the * columns they belong to * @return The partition objec,t or null if there is no such partition * @throws IOException */ Partition getPartition(String dbName, String tableName, List<String> partVals) throws IOException { return getPartition(dbName, tableName, partVals, true); } /** * Get a set of specific partitions. This cannot be used to do a scan, each partition must be * completely specified. This does not use the partition cache. * @param dbName database table is in * @param tableName table partitions are in * @param partValLists list of list of values, each list should uniquely identify one partition * @return a list of partition objects. * @throws IOException */ List<Partition> getPartitions(String dbName, String tableName, List<String> partTypes, List<List<String>> partValLists) throws IOException { List<Partition> parts = new ArrayList<>(partValLists.size()); List<Get> gets = new ArrayList<>(partValLists.size()); for (List<String> partVals : partValLists) { byte[] key = HBaseUtils.buildPartitionKey(dbName, tableName, partTypes, partVals); Get get = new Get(key); get.addColumn(CATALOG_CF, CATALOG_COL); gets.add(get); } HTableInterface htab = conn.getHBaseTable(PART_TABLE); Result[] results = htab.get(gets); for (int i = 0; i < results.length; i++) { HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializePartition(dbName, tableName, partValLists.get(i), results[i].getValue(CATALOG_CF, CATALOG_COL)); StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); HBaseUtils.assembleStorageDescriptor(sd, sdParts); parts.add(sdParts.containingPartition); } return parts; } /** * Add a partition. This should only be called for new partitions. For altering existing * partitions this should not be called as it will blindly increment the ref counter for the * storage descriptor. * @param partition partition object to add * @throws IOException */ void putPartition(Partition partition) throws IOException { byte[] hash = putStorageDescriptor(partition.getSd()); byte[][] serialized = HBaseUtils.serializePartition(partition, HBaseUtils.getPartitionKeyTypes(getTable(partition.getDbName(), partition.getTableName()).getPartitionKeys()), hash); store(PART_TABLE, serialized[0], CATALOG_CF, CATALOG_COL, serialized[1]); partCache.put(partition.getDbName(), partition.getTableName(), partition); } /** * Replace an existing partition. * @param oldPart partition to be replaced * @param newPart partitiion to replace it with * @throws IOException */ void replacePartition(Partition oldPart, Partition newPart, List<String> partTypes) throws IOException { byte[] hash; byte[] oldHash = HBaseUtils.hashStorageDescriptor(oldPart.getSd(), md); byte[] newHash = HBaseUtils.hashStorageDescriptor(newPart.getSd(), md); if (Arrays.equals(oldHash, newHash)) { hash = oldHash; } else { decrementStorageDescriptorRefCount(oldPart.getSd()); hash = putStorageDescriptor(newPart.getSd()); } byte[][] serialized = HBaseUtils.serializePartition(newPart, HBaseUtils.getPartitionKeyTypes(getTable(newPart.getDbName(), newPart.getTableName()).getPartitionKeys()), hash); store(PART_TABLE, serialized[0], CATALOG_CF, CATALOG_COL, serialized[1]); partCache.put(newPart.getDbName(), newPart.getTableName(), newPart); if (!oldPart.getTableName().equals(newPart.getTableName())) { deletePartition(oldPart.getDbName(), oldPart.getTableName(), partTypes, oldPart.getValues()); } } /** * Add a group of partitions. This should only be used when all partitions are new. It * blindly increments the ref count on the storage descriptor. * @param partitions list of partitions to add * @throws IOException */ void putPartitions(List<Partition> partitions) throws IOException { List<Put> puts = new ArrayList<>(partitions.size()); for (Partition partition : partitions) { byte[] hash = putStorageDescriptor(partition.getSd()); List<String> partTypes = HBaseUtils.getPartitionKeyTypes( getTable(partition.getDbName(), partition.getTableName()).getPartitionKeys()); byte[][] serialized = HBaseUtils.serializePartition(partition, partTypes, hash); Put p = new Put(serialized[0]); p.add(CATALOG_CF, CATALOG_COL, serialized[1]); puts.add(p); partCache.put(partition.getDbName(), partition.getTableName(), partition); } HTableInterface htab = conn.getHBaseTable(PART_TABLE); htab.put(puts); conn.flush(htab); } void replacePartitions(List<Partition> oldParts, List<Partition> newParts, List<String> oldPartTypes) throws IOException { if (oldParts.size() != newParts.size()) { throw new RuntimeException("Number of old and new partitions must match."); } List<Put> puts = new ArrayList<>(newParts.size()); for (int i = 0; i < newParts.size(); i++) { byte[] hash; byte[] oldHash = HBaseUtils.hashStorageDescriptor(oldParts.get(i).getSd(), md); byte[] newHash = HBaseUtils.hashStorageDescriptor(newParts.get(i).getSd(), md); if (Arrays.equals(oldHash, newHash)) { hash = oldHash; } else { decrementStorageDescriptorRefCount(oldParts.get(i).getSd()); hash = putStorageDescriptor(newParts.get(i).getSd()); } Partition newPart = newParts.get(i); byte[][] serialized = HBaseUtils.serializePartition(newPart, HBaseUtils.getPartitionKeyTypes(getTable(newPart.getDbName(), newPart.getTableName()).getPartitionKeys()), hash); Put p = new Put(serialized[0]); p.add(CATALOG_CF, CATALOG_COL, serialized[1]); puts.add(p); partCache.put(newParts.get(i).getDbName(), newParts.get(i).getTableName(), newParts.get(i)); if (!newParts.get(i).getTableName().equals(oldParts.get(i).getTableName())) { // We need to remove the old record as well. deletePartition(oldParts.get(i).getDbName(), oldParts.get(i).getTableName(), oldPartTypes, oldParts.get(i).getValues(), false); } } HTableInterface htab = conn.getHBaseTable(PART_TABLE); htab.put(puts); conn.flush(htab); } /** * Find all the partitions in a table. * @param dbName name of the database the table is in * @param tableName table name * @param maxPartitions max partitions to fetch. If negative all partitions will be returned. * @return List of partitions that match the criteria. * @throws IOException */ List<Partition> scanPartitionsInTable(String dbName, String tableName, int maxPartitions) throws IOException { if (maxPartitions < 0) maxPartitions = Integer.MAX_VALUE; Collection<Partition> cached = partCache.getAllForTable(dbName, tableName); if (cached != null) { return maxPartitions < cached.size() ? new ArrayList<>(cached).subList(0, maxPartitions) : new ArrayList<>(cached); } byte[] keyPrefix = HBaseUtils.buildPartitionKey(dbName, tableName, new ArrayList<String>(), new ArrayList<String>(), false); List<Partition> parts = scanPartitionsWithFilter(dbName, tableName, keyPrefix, HBaseUtils.getEndPrefix(keyPrefix), -1, null); partCache.put(dbName, tableName, parts, true); return maxPartitions < parts.size() ? parts.subList(0, maxPartitions) : parts; } /** * Scan partitions based on partial key information. * @param dbName name of database, required * @param tableName name of table, required * @param partVals partial specification of values. Any values that are unknown can instead be * a '*'. For example, if a table had two partition columns date * and region (in that order), and partitions ('today', 'na'), ('today', 'eu'), * ('tomorrow', 'na'), ('tomorrow', 'eu') then passing ['today', '*'] would return * ('today', 'na') and ('today', 'eu') while passing ['*', 'eu'] would return * ('today', 'eu') and ('tomorrow', 'eu'). Also the list can terminate early, * which will be the equivalent of adding '*' for all non-included values. * I.e. ['today'] is the same as ['today', '*']. * @param maxPartitions Maximum number of entries to return. * @return list of partitions that match the specified information * @throws IOException * @throws org.apache.hadoop.hive.metastore.api.NoSuchObjectException if the table containing * the partitions can't be found. */ List<Partition> scanPartitions(String dbName, String tableName, List<String> partVals, int maxPartitions) throws IOException, NoSuchObjectException { PartitionScanInfo psi = scanPartitionsInternal(dbName, tableName, partVals, maxPartitions); List<Partition> parts = scanPartitionsWithFilter(dbName, tableName, psi.keyPrefix, psi.endKeyPrefix, maxPartitions, psi.filter); partCache.put(dbName, tableName, parts, false); return parts; } List<Partition> scanPartitions(String dbName, String tableName, byte[] keyStart, byte[] keyEnd, Filter filter, int maxPartitions) throws IOException, NoSuchObjectException { byte[] startRow = keyStart; byte[] endRow; if (keyEnd == null || keyEnd.length == 0) { // stop when current db+table entries are over endRow = HBaseUtils.getEndPrefix(startRow); } else { endRow = keyEnd; } if (LOG.isDebugEnabled()) { LOG.debug("Scanning partitions with start row <" + new String(startRow) + "> and end row <" + new String(endRow) + ">"); } return scanPartitionsWithFilter(dbName, tableName, startRow, endRow, maxPartitions, filter); } /** * Delete a partition * @param dbName database name that table is in * @param tableName table partition is in * @param partVals partition values that define this partition, in the same order as the * partition columns they are values for * @throws IOException */ void deletePartition(String dbName, String tableName, List<String> partTypes, List<String> partVals) throws IOException { deletePartition(dbName, tableName, partTypes, partVals, true); } /** * Print out a partition. * @param partKey The key for the partition. This must include dbname.tablename._partkeys_ * where _partkeys_ is a dot separated list of partition values in the proper * order. * @return string containing the partition * @throws IOException * @throws TException */ String printPartition(String partKey) throws IOException, TException { // First figure out the table and fetch it String[] partKeyParts = partKey.split(HBaseUtils.KEY_SEPARATOR_STR); if (partKeyParts.length < 3) return noSuch(partKey, "partition"); Table table = getTable(partKeyParts[0], partKeyParts[1]); if (table == null) return noSuch(partKey, "partition"); byte[] key = HBaseUtils.buildPartitionKey(partKeyParts[0], partKeyParts[1], HBaseUtils.getPartitionKeyTypes(table.getPartitionKeys()), Arrays.asList(Arrays.copyOfRange(partKeyParts, 2, partKeyParts.length))); @SuppressWarnings("deprecation") HTableInterface htab = conn.getHBaseTable(PART_TABLE); Get g = new Get(key); g.addColumn(CATALOG_CF, CATALOG_COL); g.addFamily(STATS_CF); Result result = htab.get(g); if (result.isEmpty()) return noSuch(partKey, "partition"); return printOnePartition(result); } /** * Print partitions * @param partKey a partial partition key. This must match the beginings of the partition key. * It can be just dbname.tablename, or dbname.table.pval... where pval are the * partition values in order. They must be in the correct order and they must * be literal values (no regular expressions) * @return partitions as strings * @throws IOException * @throws TException */ List<String> printPartitions(String partKey) throws IOException, TException { // First figure out the table and fetch it // Split on dot here rather than the standard separator because this will be passed in as a // regex, even though we aren't fully supporting regex's. String[] partKeyParts = partKey.split("\\."); if (partKeyParts.length < 2) return noMatch(partKey, "partition"); List<String> partVals = partKeyParts.length == 2 ? Arrays.asList("*") : Arrays.asList(Arrays.copyOfRange(partKeyParts, 2, partKeyParts.length)); PartitionScanInfo psi; try { psi = scanPartitionsInternal(partKeyParts[0], partKeyParts[1], partVals, -1); } catch (NoSuchObjectException e) { return noMatch(partKey, "partition"); } @SuppressWarnings("deprecation") HTableInterface htab = conn.getHBaseTable(PART_TABLE); Scan scan = new Scan(); scan.addColumn(CATALOG_CF, CATALOG_COL); scan.addFamily(STATS_CF); scan.setStartRow(psi.keyPrefix); scan.setStopRow(psi.endKeyPrefix); scan.setFilter(psi.filter); Iterator<Result> iter = htab.getScanner(scan).iterator(); if (!iter.hasNext()) return noMatch(partKey, "partition"); List<String> lines = new ArrayList<>(); while (iter.hasNext()) { lines.add(printOnePartition(iter.next())); } return lines; } int getPartitionCount() throws IOException { Filter fil = new FirstKeyOnlyFilter(); Iterator<Result> iter = scan(PART_TABLE, fil); return Iterators.size(iter); } private String printOnePartition(Result result) throws IOException, TException { byte[] key = result.getRow(); HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializePartition(key, result.getValue(CATALOG_CF, CATALOG_COL), this); StringBuilder builder = new StringBuilder(); builder.append(dumpThriftObject(sdParts.containingPartition)) .append(" sdHash: ") .append(Base64.encodeBase64URLSafeString(sdParts.sdHash)) .append(" stats:"); NavigableMap<byte[], byte[]> statsCols = result.getFamilyMap(STATS_CF); for (Map.Entry<byte[], byte[]> statsCol : statsCols.entrySet()) { builder.append(" column ") .append(new String(statsCol.getKey(), HBaseUtils.ENCODING)) .append(": "); ColumnStatistics pcs = buildColStats(key, false); ColumnStatisticsObj cso = HBaseUtils.deserializeStatsForOneColumn(pcs, statsCol.getValue()); builder.append(dumpThriftObject(cso)); } return builder.toString(); } private void deletePartition(String dbName, String tableName, List<String> partTypes, List<String> partVals, boolean decrementRefCnt) throws IOException { // Find the partition so I can get the storage descriptor and drop it partCache.remove(dbName, tableName, partVals); if (decrementRefCnt) { Partition p = getPartition(dbName, tableName, partVals, false); decrementStorageDescriptorRefCount(p.getSd()); } byte[] key = HBaseUtils.buildPartitionKey(dbName, tableName, partTypes, partVals); delete(PART_TABLE, key, null, null); } private Partition getPartition(String dbName, String tableName, List<String> partVals, boolean populateCache) throws IOException { Partition cached = partCache.get(dbName, tableName, partVals); if (cached != null) return cached; byte[] key = HBaseUtils.buildPartitionKey(dbName, tableName, HBaseUtils.getPartitionKeyTypes(getTable(dbName, tableName).getPartitionKeys()), partVals); byte[] serialized = read(PART_TABLE, key, CATALOG_CF, CATALOG_COL); if (serialized == null) return null; HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializePartition(dbName, tableName, partVals, serialized); StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); HBaseUtils.assembleStorageDescriptor(sd, sdParts); if (populateCache) partCache.put(dbName, tableName, sdParts.containingPartition); return sdParts.containingPartition; } private static class PartitionScanInfo { final String dbName; final String tableName; final byte[] keyPrefix; final byte[] endKeyPrefix; final int maxPartitions; final Filter filter; PartitionScanInfo(String d, String t, byte[] k, byte[] e, int m, Filter f) { dbName = d; tableName = t; keyPrefix = k; endKeyPrefix = e; maxPartitions = m; filter = f; } @Override public String toString() { return new StringBuilder("dbName:") .append(dbName) .append(" tableName:") .append(tableName) .append(" keyPrefix:") .append(Base64.encodeBase64URLSafeString(keyPrefix)) .append(" endKeyPrefix:") .append(Base64.encodeBase64URLSafeString(endKeyPrefix)) .append(" maxPartitions:") .append(maxPartitions) .append(" filter:") .append(filter.toString()) .toString(); } } private PartitionScanInfo scanPartitionsInternal(String dbName, String tableName, List<String> partVals, int maxPartitions) throws IOException, NoSuchObjectException { // First, build as much of the key as we can so that we make the scan as tight as possible. List<String> keyElements = new ArrayList<>(); keyElements.add(dbName); keyElements.add(tableName); int firstStar = -1; for (int i = 0; i < partVals.size(); i++) { if ("*".equals(partVals.get(i))) { firstStar = i; break; } else { // empty string equals to null partition, // means star if (partVals.get(i).equals("")) { break; } else { keyElements.add(partVals.get(i)); } } } byte[] keyPrefix; // We need to fetch the table to determine if the user fully specified the partitions or // not, as it affects how we build the key. Table table = getTable(dbName, tableName); if (table == null) { throw new NoSuchObjectException("Unable to find table " + dbName + "." + tableName); } keyPrefix = HBaseUtils.buildPartitionKey(dbName, tableName, HBaseUtils.getPartitionKeyTypes(table.getPartitionKeys().subList(0, keyElements.size()-2)), keyElements.subList(2, keyElements.size())); // Now, build a filter out of the remaining keys List<PartitionKeyComparator.Range> ranges = new ArrayList<PartitionKeyComparator.Range>(); List<Operator> ops = new ArrayList<Operator>(); if (!(partVals.size() == table.getPartitionKeys().size() && firstStar == -1)) { for (int i = Math.max(0, firstStar); i < table.getPartitionKeys().size() && i < partVals.size(); i++) { if ("*".equals(partVals.get(i))) { PartitionKeyComparator.Operator op = new PartitionKeyComparator.Operator( PartitionKeyComparator.Operator.Type.LIKE, table.getPartitionKeys().get(i).getName(), ".*"); ops.add(op); } else { PartitionKeyComparator.Range range = new PartitionKeyComparator.Range( table.getPartitionKeys().get(i).getName(), new PartitionKeyComparator.Mark(partVals.get(i), true), new PartitionKeyComparator.Mark(partVals.get(i), true)); ranges.add(range); } } } Filter filter = null; if (!ranges.isEmpty() || !ops.isEmpty()) { filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new PartitionKeyComparator( StringUtils.join(HBaseUtils.getPartitionNames(table.getPartitionKeys()), ","), StringUtils.join(HBaseUtils.getPartitionKeyTypes(table.getPartitionKeys()), ","), ranges, ops)); } if (LOG.isDebugEnabled()) { LOG.debug("Scanning partitions with prefix <" + new String(keyPrefix) + "> and filter <" + filter + ">"); } return new PartitionScanInfo(dbName, tableName, keyPrefix, HBaseUtils.getEndPrefix(keyPrefix), maxPartitions, filter); } private List<Partition> scanPartitionsWithFilter(String dbName, String tableName, byte[] startRow, byte [] endRow, int maxResults, Filter filter) throws IOException { Iterator<Result> iter = scan(PART_TABLE, startRow, endRow, CATALOG_CF, CATALOG_COL, filter); List<FieldSchema> tablePartitions = getTable(dbName, tableName).getPartitionKeys(); List<Partition> parts = new ArrayList<>(); int numToFetch = maxResults < 0 ? Integer.MAX_VALUE : maxResults; for (int i = 0; i < numToFetch && iter.hasNext(); i++) { Result result = iter.next(); HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializePartition(dbName, tableName, tablePartitions, result.getRow(), result.getValue(CATALOG_CF, CATALOG_COL), conf); StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); HBaseUtils.assembleStorageDescriptor(sd, sdParts); parts.add(sdParts.containingPartition); } return parts; } /********************************************************************************************** * Role related methods *********************************************************************************************/ /** * Fetch the list of all roles for a user * @param userName name of the user * @return the list of all roles this user participates in * @throws IOException */ List<String> getUserRoles(String userName) throws IOException { byte[] key = HBaseUtils.buildKey(userName); byte[] serialized = read(USER_TO_ROLE_TABLE, key, CATALOG_CF, CATALOG_COL); if (serialized == null) return null; return HBaseUtils.deserializeRoleList(serialized); } /** * Find all roles directly participated in by a given principal. This builds the role cache * because it assumes that subsequent calls may be made to find roles participated in indirectly. * @param name username or role name * @param type user or role * @return map of role name to grant info for all roles directly participated in. */ List<Role> getPrincipalDirectRoles(String name, PrincipalType type) throws IOException { buildRoleCache(); Set<String> rolesFound = new HashSet<>(); for (Map.Entry<String, HbaseMetastoreProto.RoleGrantInfoList> e : roleCache.entrySet()) { for (HbaseMetastoreProto.RoleGrantInfo giw : e.getValue().getGrantInfoList()) { if (HBaseUtils.convertPrincipalTypes(giw.getPrincipalType()) == type && giw.getPrincipalName().equals(name)) { rolesFound.add(e.getKey()); break; } } } List<Role> directRoles = new ArrayList<>(rolesFound.size()); List<Get> gets = new ArrayList<>(); HTableInterface htab = conn.getHBaseTable(ROLE_TABLE); for (String roleFound : rolesFound) { byte[] key = HBaseUtils.buildKey(roleFound); Get g = new Get(key); g.addColumn(CATALOG_CF, CATALOG_COL); gets.add(g); } Result[] results = htab.get(gets); for (int i = 0; i < results.length; i++) { byte[] serialized = results[i].getValue(CATALOG_CF, CATALOG_COL); if (serialized != null) { directRoles.add(HBaseUtils.deserializeRole(results[i].getRow(), serialized)); } } return directRoles; } /** * Fetch all roles and users included directly in a given role. * @param roleName name of the principal * @return a list of all roles included in this role * @throws IOException */ HbaseMetastoreProto.RoleGrantInfoList getRolePrincipals(String roleName) throws IOException, NoSuchObjectException { HbaseMetastoreProto.RoleGrantInfoList rolePrincipals = roleCache.get(roleName); if (rolePrincipals != null) return rolePrincipals; byte[] key = HBaseUtils.buildKey(roleName); byte[] serialized = read(ROLE_TABLE, key, CATALOG_CF, ROLES_COL); if (serialized == null) return null; rolePrincipals = HbaseMetastoreProto.RoleGrantInfoList.parseFrom(serialized); roleCache.put(roleName, rolePrincipals); return rolePrincipals; } /** * Given a role, find all users who are either directly or indirectly participate in this role. * This is expensive, it should be used sparingly. It scan the entire userToRole table and * does a linear search on each entry. * @param roleName name of the role * @return set of all users in the role * @throws IOException */ Set<String> findAllUsersInRole(String roleName) throws IOException { // Walk the userToRole table and collect every user that matches this role. Set<String> users = new HashSet<>(); Iterator<Result> iter = scan(USER_TO_ROLE_TABLE, CATALOG_CF, CATALOG_COL); while (iter.hasNext()) { Result result = iter.next(); List<String> roleList = HBaseUtils.deserializeRoleList(result.getValue(CATALOG_CF, CATALOG_COL)); for (String rn : roleList) { if (rn.equals(roleName)) { users.add(new String(result.getRow(), HBaseUtils.ENCODING)); break; } } } return users; } /** * Add a principal to a role. * @param roleName name of the role to add principal to * @param grantInfo grant information for this principal. * @throws java.io.IOException * @throws NoSuchObjectException * */ void addPrincipalToRole(String roleName, HbaseMetastoreProto.RoleGrantInfo grantInfo) throws IOException, NoSuchObjectException { HbaseMetastoreProto.RoleGrantInfoList proto = getRolePrincipals(roleName); List<HbaseMetastoreProto.RoleGrantInfo> rolePrincipals = new ArrayList<>(); if (proto != null) { rolePrincipals.addAll(proto.getGrantInfoList()); } rolePrincipals.add(grantInfo); proto = HbaseMetastoreProto.RoleGrantInfoList.newBuilder() .addAllGrantInfo(rolePrincipals) .build(); byte[] key = HBaseUtils.buildKey(roleName); store(ROLE_TABLE, key, CATALOG_CF, ROLES_COL, proto.toByteArray()); roleCache.put(roleName, proto); } /** * Drop a principal from a role. * @param roleName Name of the role to drop the principal from * @param principalName name of the principal to drop from the role * @param type user or role * @param grantOnly if this is true, just remove the grant option, don't actually remove the * user from the role. * @throws NoSuchObjectException * @throws IOException */ void dropPrincipalFromRole(String roleName, String principalName, PrincipalType type, boolean grantOnly) throws NoSuchObjectException, IOException { HbaseMetastoreProto.RoleGrantInfoList proto = getRolePrincipals(roleName); if (proto == null) return; List<HbaseMetastoreProto.RoleGrantInfo> rolePrincipals = new ArrayList<>(); rolePrincipals.addAll(proto.getGrantInfoList()); for (int i = 0; i < rolePrincipals.size(); i++) { if (HBaseUtils.convertPrincipalTypes(rolePrincipals.get(i).getPrincipalType()) == type && rolePrincipals.get(i).getPrincipalName().equals(principalName)) { if (grantOnly) { rolePrincipals.set(i, HbaseMetastoreProto.RoleGrantInfo.newBuilder(rolePrincipals.get(i)) .setGrantOption(false) .build()); } else { rolePrincipals.remove(i); } break; } } byte[] key = HBaseUtils.buildKey(roleName); proto = HbaseMetastoreProto.RoleGrantInfoList.newBuilder() .addAllGrantInfo(rolePrincipals) .build(); store(ROLE_TABLE, key, CATALOG_CF, ROLES_COL, proto.toByteArray()); roleCache.put(roleName, proto); } /** * Rebuild the row for a given user in the USER_TO_ROLE table. This is expensive. It * should be called as infrequently as possible. * @param userName name of the user * @throws IOException */ void buildRoleMapForUser(String userName) throws IOException, NoSuchObjectException { // This is mega ugly. Hopefully we don't have to do this too often. // First, scan the role table and put it all in memory buildRoleCache(); LOG.debug("Building role map for " + userName); // Second, find every role the user participates in directly. Set<String> rolesToAdd = new HashSet<>(); Set<String> rolesToCheckNext = new HashSet<>(); for (Map.Entry<String, HbaseMetastoreProto.RoleGrantInfoList> e : roleCache.entrySet()) { for (HbaseMetastoreProto.RoleGrantInfo grantInfo : e.getValue().getGrantInfoList()) { if (HBaseUtils.convertPrincipalTypes(grantInfo.getPrincipalType()) == PrincipalType.USER && userName .equals(grantInfo.getPrincipalName())) { rolesToAdd.add(e.getKey()); rolesToCheckNext.add(e.getKey()); LOG.debug("Adding " + e.getKey() + " to list of roles user is in directly"); break; } } } // Third, find every role the user participates in indirectly (that is, they have been // granted into role X and role Y has been granted into role X). while (rolesToCheckNext.size() > 0) { Set<String> tmpRolesToCheckNext = new HashSet<>(); for (String roleName : rolesToCheckNext) { HbaseMetastoreProto.RoleGrantInfoList grantInfos = roleCache.get(roleName); if (grantInfos == null) continue; // happens when a role contains no grants for (HbaseMetastoreProto.RoleGrantInfo grantInfo : grantInfos.getGrantInfoList()) { if (HBaseUtils.convertPrincipalTypes(grantInfo.getPrincipalType()) == PrincipalType.ROLE && rolesToAdd.add(grantInfo.getPrincipalName())) { tmpRolesToCheckNext.add(grantInfo.getPrincipalName()); LOG.debug("Adding " + grantInfo.getPrincipalName() + " to list of roles user is in indirectly"); } } } rolesToCheckNext = tmpRolesToCheckNext; } byte[] key = HBaseUtils.buildKey(userName); byte[] serialized = HBaseUtils.serializeRoleList(new ArrayList<>(rolesToAdd)); store(USER_TO_ROLE_TABLE, key, CATALOG_CF, CATALOG_COL, serialized); } /** * Remove all of the grants for a role. This is not cheap. * @param roleName Role to remove from all other roles and grants * @throws IOException */ void removeRoleGrants(String roleName) throws IOException { buildRoleCache(); List<Put> puts = new ArrayList<>(); // First, walk the role table and remove any references to this role for (Map.Entry<String, HbaseMetastoreProto.RoleGrantInfoList> e : roleCache.entrySet()) { boolean madeAChange = false; List<HbaseMetastoreProto.RoleGrantInfo> rgil = new ArrayList<>(); rgil.addAll(e.getValue().getGrantInfoList()); for (int i = 0; i < rgil.size(); i++) { if (HBaseUtils.convertPrincipalTypes(rgil.get(i).getPrincipalType()) == PrincipalType.ROLE && rgil.get(i).getPrincipalName().equals(roleName)) { rgil.remove(i); madeAChange = true; break; } } if (madeAChange) { Put put = new Put(HBaseUtils.buildKey(e.getKey())); HbaseMetastoreProto.RoleGrantInfoList proto = HbaseMetastoreProto.RoleGrantInfoList.newBuilder() .addAllGrantInfo(rgil) .build(); put.add(CATALOG_CF, ROLES_COL, proto.toByteArray()); puts.add(put); roleCache.put(e.getKey(), proto); } } if (puts.size() > 0) { HTableInterface htab = conn.getHBaseTable(ROLE_TABLE); htab.put(puts); conn.flush(htab); } // Remove any global privileges held by this role PrincipalPrivilegeSet global = getGlobalPrivs(); if (global != null && global.getRolePrivileges() != null && global.getRolePrivileges().remove(roleName) != null) { putGlobalPrivs(global); } // Now, walk the db table puts.clear(); List<Database> dbs = scanDatabases(null); if (dbs == null) dbs = new ArrayList<>(); // rare, but can happen for (Database db : dbs) { if (db.getPrivileges() != null && db.getPrivileges().getRolePrivileges() != null && db.getPrivileges().getRolePrivileges().remove(roleName) != null) { byte[][] serialized = HBaseUtils.serializeDatabase(db); Put put = new Put(serialized[0]); put.add(CATALOG_CF, CATALOG_COL, serialized[1]); puts.add(put); } } if (puts.size() > 0) { HTableInterface htab = conn.getHBaseTable(DB_TABLE); htab.put(puts); conn.flush(htab); } // Finally, walk the table table puts.clear(); for (Database db : dbs) { List<Table> tables = scanTables(db.getName(), null); if (tables != null) { for (Table table : tables) { if (table.getPrivileges() != null && table.getPrivileges().getRolePrivileges() != null && table.getPrivileges().getRolePrivileges().remove(roleName) != null) { byte[][] serialized = HBaseUtils.serializeTable(table, HBaseUtils.hashStorageDescriptor(table.getSd(), md)); Put put = new Put(serialized[0]); put.add(CATALOG_CF, CATALOG_COL, serialized[1]); puts.add(put); } } } } if (puts.size() > 0) { HTableInterface htab = conn.getHBaseTable(TABLE_TABLE); htab.put(puts); conn.flush(htab); } } /** * Fetch a role * @param roleName name of the role * @return role object, or null if no such role * @throws IOException */ Role getRole(String roleName) throws IOException { byte[] key = HBaseUtils.buildKey(roleName); byte[] serialized = read(ROLE_TABLE, key, CATALOG_CF, CATALOG_COL); if (serialized == null) return null; return HBaseUtils.deserializeRole(roleName, serialized); } /** * Get a list of roles. * @return list of all known roles. * @throws IOException */ List<Role> scanRoles() throws IOException { return scanRoles(null); } /** * Add a new role * @param role role object * @throws IOException */ void putRole(Role role) throws IOException { byte[][] serialized = HBaseUtils.serializeRole(role); store(ROLE_TABLE, serialized[0], CATALOG_CF, CATALOG_COL, serialized[1]); } /** * Drop a role * @param roleName name of role to drop * @throws IOException */ void deleteRole(String roleName) throws IOException { byte[] key = HBaseUtils.buildKey(roleName); delete(ROLE_TABLE, key, null, null); roleCache.remove(roleName); } String printRolesForUser(String userName) throws IOException { List<String> roles = getUserRoles(userName); if (roles == null || roles.size() == 0) return noSuch(userName, "user"); return org.apache.commons.lang.StringUtils.join(roles, ','); } List<String> printRolesForUsers(String regex) throws IOException { Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); Iterator<Result> iter = scan(USER_TO_ROLE_TABLE, null, null, CATALOG_CF, CATALOG_COL, filter); List<String> lines = new ArrayList<>(); while (iter.hasNext()) { Result result = iter.next(); lines.add(new String(result.getRow(), HBaseUtils.ENCODING) + ": " + org.apache.commons.lang.StringUtils.join( HBaseUtils.deserializeRoleList(result.getValue(CATALOG_CF, CATALOG_COL)), ',')); } if (lines.size() == 0) lines = noMatch(regex, "user"); return lines; } /** * Print out a role * @param name name of role to print * @return string printout of role */ String printRole(String name) throws IOException, TException { Role role = getRole(name); if (role == null) return noSuch(name, "role"); else return dumpThriftObject(role); } /** * Print out roles. * @param regex regular to use to search for roles * @return string printout of roles * @throws IOException * @throws TException */ List<String> printRoles(String regex) throws IOException, TException { List<Role> roles = scanRoles(regex); if (roles.size() == 0) { return noMatch(regex, "role"); } else { List<String> lines = new ArrayList<>(); for (Role role : roles) lines.add(dumpThriftObject(role)); return lines; } } private List<Role> scanRoles(String regex) throws IOException { Filter filter = null; if (regex != null) { filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); } Iterator<Result> iter = scan(ROLE_TABLE, null, null, CATALOG_CF, CATALOG_COL, filter); List<Role> roles = new ArrayList<>(); while (iter.hasNext()) { Result result = iter.next(); roles.add(HBaseUtils.deserializeRole(result.getRow(), result.getValue(CATALOG_CF, CATALOG_COL))); } return roles; } private void buildRoleCache() throws IOException { if (!entireRoleTableInCache) { Iterator<Result> roles = scan(ROLE_TABLE, CATALOG_CF, ROLES_COL); while (roles.hasNext()) { Result res = roles.next(); String roleName = new String(res.getRow(), HBaseUtils.ENCODING); HbaseMetastoreProto.RoleGrantInfoList grantInfos = HbaseMetastoreProto.RoleGrantInfoList.parseFrom(res.getValue(CATALOG_CF, ROLES_COL)); roleCache.put(roleName, grantInfos); } entireRoleTableInCache = true; } } /********************************************************************************************** * Table related methods *********************************************************************************************/ /** * Fetch a table object * @param dbName database the table is in * @param tableName table name * @return Table object, or null if no such table * @throws IOException */ Table getTable(String dbName, String tableName) throws IOException { return getTable(dbName, tableName, true); } /** * Fetch a list of table objects. * @param dbName Database that all fetched tables are in * @param tableNames list of table names * @return list of tables, in the same order as the provided names. * @throws IOException */ List<Table> getTables(String dbName, List<String> tableNames) throws IOException { // I could implement getTable in terms of this method. But it is such a core function // that I don't want to slow it down for the much less common fetching of multiple tables. List<Table> results = new ArrayList<>(tableNames.size()); ObjectPair<String, String>[] hashKeys = new ObjectPair[tableNames.size()]; boolean atLeastOneMissing = false; for (int i = 0; i < tableNames.size(); i++) { hashKeys[i] = new ObjectPair<>(dbName, tableNames.get(i)); // The result may be null, but we still want to add it so that we have a slot in the list // for it. results.add(tableCache.get(hashKeys[i])); if (results.get(i) == null) atLeastOneMissing = true; } if (!atLeastOneMissing) return results; // Now build a single get that will fetch the remaining tables List<Get> gets = new ArrayList<>(); HTableInterface htab = conn.getHBaseTable(TABLE_TABLE); for (int i = 0; i < tableNames.size(); i++) { if (results.get(i) != null) continue; byte[] key = HBaseUtils.buildKey(dbName, tableNames.get(i)); Get g = new Get(key); g.addColumn(CATALOG_CF, CATALOG_COL); gets.add(g); } Result[] res = htab.get(gets); for (int i = 0, nextGet = 0; i < tableNames.size(); i++) { if (results.get(i) != null) continue; byte[] serialized = res[nextGet++].getValue(CATALOG_CF, CATALOG_COL); if (serialized != null) { HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializeTable(dbName, tableNames.get(i), serialized); StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); HBaseUtils.assembleStorageDescriptor(sd, sdParts); tableCache.put(hashKeys[i], sdParts.containingTable); results.set(i, sdParts.containingTable); } } return results; } /** * Get a list of tables. * @param dbName Database these tables are in * @param regex Regular expression to use in searching for table names. It is expected to * be a Java regular expression. If it is null then all tables in the indicated * database will be returned. * @return list of tables matching the regular expression. * @throws IOException */ List<Table> scanTables(String dbName, String regex) throws IOException { // There's no way to know whether all the tables we are looking for are // in the cache, so we would need to scan one way or another. Thus there's no value in hitting // the cache for this function. byte[] keyPrefix = null; if (dbName != null) { keyPrefix = HBaseUtils.buildKeyWithTrailingSeparator(dbName); } Filter filter = null; if (regex != null) { filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); } Iterator<Result> iter = scan(TABLE_TABLE, keyPrefix, HBaseUtils.getEndPrefix(keyPrefix), CATALOG_CF, CATALOG_COL, filter); List<Table> tables = new ArrayList<>(); while (iter.hasNext()) { Result result = iter.next(); HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializeTable(result.getRow(), result.getValue(CATALOG_CF, CATALOG_COL)); StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); HBaseUtils.assembleStorageDescriptor(sd, sdParts); tables.add(sdParts.containingTable); } return tables; } /** * Put a table object. This should only be called when the table is new (create table) as it * will blindly add/increment the storage descriptor. If you are altering an existing table * call {@link #replaceTable} instead. * @param table table object * @throws IOException */ void putTable(Table table) throws IOException { byte[] hash = putStorageDescriptor(table.getSd()); byte[][] serialized = HBaseUtils.serializeTable(table, hash); store(TABLE_TABLE, serialized[0], CATALOG_CF, CATALOG_COL, serialized[1]); tableCache.put(new ObjectPair<>(table.getDbName(), table.getTableName()), table); } /** * Replace an existing table. This will also compare the storage descriptors and see if the * reference count needs to be adjusted * @param oldTable old version of the table * @param newTable new version of the table */ void replaceTable(Table oldTable, Table newTable) throws IOException { byte[] hash; byte[] oldHash = HBaseUtils.hashStorageDescriptor(oldTable.getSd(), md); byte[] newHash = HBaseUtils.hashStorageDescriptor(newTable.getSd(), md); if (Arrays.equals(oldHash, newHash)) { hash = oldHash; } else { decrementStorageDescriptorRefCount(oldTable.getSd()); hash = putStorageDescriptor(newTable.getSd()); } byte[][] serialized = HBaseUtils.serializeTable(newTable, hash); store(TABLE_TABLE, serialized[0], CATALOG_CF, CATALOG_COL, serialized[1]); tableCache.put(new ObjectPair<>(newTable.getDbName(), newTable.getTableName()), newTable); if (!oldTable.getTableName().equals(newTable.getTableName())) { deleteTable(oldTable.getDbName(), oldTable.getTableName()); } } /** * Delete a table * @param dbName name of database table is in * @param tableName table to drop * @throws IOException */ void deleteTable(String dbName, String tableName) throws IOException { deleteTable(dbName, tableName, true); } /** * Print out a table. * @param name The name for the table. This must include dbname.tablename * @return string containing the table * @throws IOException * @throws TException */ String printTable(String name) throws IOException, TException { byte[] key = HBaseUtils.buildKey(name); @SuppressWarnings("deprecation") HTableInterface htab = conn.getHBaseTable(TABLE_TABLE); Get g = new Get(key); g.addColumn(CATALOG_CF, CATALOG_COL); g.addFamily(STATS_CF); Result result = htab.get(g); if (result.isEmpty()) return noSuch(name, "table"); return printOneTable(result); } /** * Print tables * @param regex to use to find the tables. Remember that dbname is in each * table name. * @return tables as strings * @throws IOException * @throws TException */ List<String> printTables(String regex) throws IOException, TException { Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); @SuppressWarnings("deprecation") HTableInterface htab = conn.getHBaseTable(TABLE_TABLE); Scan scan = new Scan(); scan.addColumn(CATALOG_CF, CATALOG_COL); scan.addFamily(STATS_CF); scan.setFilter(filter); Iterator<Result> iter = htab.getScanner(scan).iterator(); if (!iter.hasNext()) return noMatch(regex, "table"); List<String> lines = new ArrayList<>(); while (iter.hasNext()) { lines.add(printOneTable(iter.next())); } return lines; } int getTableCount() throws IOException { Filter fil = new FirstKeyOnlyFilter(); Iterator<Result> iter = scan(TABLE_TABLE, fil); return Iterators.size(iter); } private String printOneTable(Result result) throws IOException, TException { byte[] key = result.getRow(); HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializeTable(key, result.getValue(CATALOG_CF, CATALOG_COL)); StringBuilder builder = new StringBuilder(); builder.append(dumpThriftObject(sdParts.containingTable)) .append(" sdHash: ") .append(Base64.encodeBase64URLSafeString(sdParts.sdHash)) .append(" stats:"); NavigableMap<byte[], byte[]> statsCols = result.getFamilyMap(STATS_CF); for (Map.Entry<byte[], byte[]> statsCol : statsCols.entrySet()) { builder.append(" column ") .append(new String(statsCol.getKey(), HBaseUtils.ENCODING)) .append(": "); ColumnStatistics pcs = buildColStats(key, true); ColumnStatisticsObj cso = HBaseUtils.deserializeStatsForOneColumn(pcs, statsCol.getValue()); builder.append(dumpThriftObject(cso)); } // Add the primary key List<SQLPrimaryKey> pk = getPrimaryKey(sdParts.containingTable.getDbName(), sdParts.containingTable.getTableName()); if (pk != null && pk.size() > 0) { builder.append(" primary key: "); for (SQLPrimaryKey pkcol : pk) builder.append(dumpThriftObject(pkcol)); } // Add any foreign keys List<SQLForeignKey> fks = getForeignKeys(sdParts.containingTable.getDbName(), sdParts.containingTable.getTableName()); if (fks != null && fks.size() > 0) { builder.append(" foreign keys: "); for (SQLForeignKey fkcol : fks) builder.append(dumpThriftObject(fkcol)); } return builder.toString(); } private void deleteTable(String dbName, String tableName, boolean decrementRefCnt) throws IOException { tableCache.remove(new ObjectPair<>(dbName, tableName)); if (decrementRefCnt) { // Find the table so I can get the storage descriptor and drop it Table t = getTable(dbName, tableName, false); decrementStorageDescriptorRefCount(t.getSd()); } byte[] key = HBaseUtils.buildKey(dbName, tableName); delete(TABLE_TABLE, key, null, null); } private Table getTable(String dbName, String tableName, boolean populateCache) throws IOException { ObjectPair<String, String> hashKey = new ObjectPair<>(dbName, tableName); Table cached = tableCache.get(hashKey); if (cached != null) return cached; byte[] key = HBaseUtils.buildKey(dbName, tableName); byte[] serialized = read(TABLE_TABLE, key, CATALOG_CF, CATALOG_COL); if (serialized == null) return null; HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializeTable(dbName, tableName, serialized); StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); HBaseUtils.assembleStorageDescriptor(sd, sdParts); if (populateCache) tableCache.put(hashKey, sdParts.containingTable); return sdParts.containingTable; } /********************************************************************************************** * Index related methods *********************************************************************************************/ /** * Put an index object. This should only be called when the index is new (create index) as it * will blindly add/increment the storage descriptor. If you are altering an existing index * call {@link #replaceIndex} instead. * @param index index object * @throws IOException */ void putIndex(Index index) throws IOException { byte[] hash = putStorageDescriptor(index.getSd()); byte[][] serialized = HBaseUtils.serializeIndex(index, hash); store(INDEX_TABLE, serialized[0], CATALOG_CF, CATALOG_COL, serialized[1]); } /** * Fetch an index object * @param dbName database the table is in * @param origTableName original table name * @param indexName index name * @return Index object, or null if no such table * @throws IOException */ Index getIndex(String dbName, String origTableName, String indexName) throws IOException { byte[] key = HBaseUtils.buildKey(dbName, origTableName, indexName); byte[] serialized = read(INDEX_TABLE, key, CATALOG_CF, CATALOG_COL); if (serialized == null) return null; HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializeIndex(dbName, origTableName, indexName, serialized); StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); HBaseUtils.assembleStorageDescriptor(sd, sdParts); return sdParts.containingIndex; } /** * Delete a table * @param dbName name of database table is in * @param origTableName table the index is built on * @param indexName index name * @throws IOException */ void deleteIndex(String dbName, String origTableName, String indexName) throws IOException { deleteIndex(dbName, origTableName, indexName, true); } void deleteIndex(String dbName, String origTableName, String indexName, boolean decrementRefCnt) throws IOException { // Find the index so I can get the storage descriptor and drop it if (decrementRefCnt) { Index index = getIndex(dbName, origTableName, indexName); decrementStorageDescriptorRefCount(index.getSd()); } byte[] key = HBaseUtils.buildKey(dbName, origTableName, indexName); delete(INDEX_TABLE, key, null, null); } /** * Get a list of tables. * @param dbName Database these tables are in * @param origTableName original table name * @param maxResults max indexes to fetch. If negative all indexes will be returned. * @return list of indexes of the table * @throws IOException */ List<Index> scanIndexes(String dbName, String origTableName, int maxResults) throws IOException { // There's no way to know whether all the tables we are looking for are // in the cache, so we would need to scan one way or another. Thus there's no value in hitting // the cache for this function. byte[] keyPrefix = null; if (dbName != null) { keyPrefix = HBaseUtils.buildKeyWithTrailingSeparator(dbName, origTableName); } Iterator<Result> iter = scan(INDEX_TABLE, keyPrefix, HBaseUtils.getEndPrefix(keyPrefix), CATALOG_CF, CATALOG_COL, null); List<Index> indexes = new ArrayList<>(); int numToFetch = maxResults < 0 ? Integer.MAX_VALUE : maxResults; for (int i = 0; i < numToFetch && iter.hasNext(); i++) { Result result = iter.next(); HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializeIndex(result.getRow(), result.getValue(CATALOG_CF, CATALOG_COL)); StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); HBaseUtils.assembleStorageDescriptor(sd, sdParts); indexes.add(sdParts.containingIndex); } return indexes; } /** * Replace an existing index. This will also compare the storage descriptors and see if the * reference count needs to be adjusted * @param oldIndex old version of the index * @param newIndex new version of the index */ void replaceIndex(Index oldIndex, Index newIndex) throws IOException { byte[] hash; byte[] oldHash = HBaseUtils.hashStorageDescriptor(oldIndex.getSd(), md); byte[] newHash = HBaseUtils.hashStorageDescriptor(newIndex.getSd(), md); if (Arrays.equals(oldHash, newHash)) { hash = oldHash; } else { decrementStorageDescriptorRefCount(oldIndex.getSd()); hash = putStorageDescriptor(newIndex.getSd()); } byte[][] serialized = HBaseUtils.serializeIndex(newIndex, hash); store(INDEX_TABLE, serialized[0], CATALOG_CF, CATALOG_COL, serialized[1]); if (!(oldIndex.getDbName().equals(newIndex.getDbName()) && oldIndex.getOrigTableName().equals(newIndex.getOrigTableName()) && oldIndex.getIndexName().equals(newIndex.getIndexName()))) { deleteIndex(oldIndex.getDbName(), oldIndex.getOrigTableName(), oldIndex.getIndexName(), false); } } /********************************************************************************************** * StorageDescriptor related methods *********************************************************************************************/ /** * If this serde has already been read, then return it from the cache. If not, read it, then * return it. * @param hash hash of the storage descriptor to read * @return the storage descriptor * @throws IOException */ StorageDescriptor getStorageDescriptor(byte[] hash) throws IOException { ByteArrayWrapper hashKey = new ByteArrayWrapper(hash); StorageDescriptor cached = sdCache.get(hashKey); if (cached != null) return cached; LOG.debug("Not found in cache, looking in hbase"); byte[] serialized = read(SD_TABLE, hash, CATALOG_CF, CATALOG_COL); if (serialized == null) { throw new RuntimeException("Woh, bad! Trying to fetch a non-existent storage descriptor " + "from hash " + Base64.encodeBase64String(hash)); } StorageDescriptor sd = HBaseUtils.deserializeStorageDescriptor(serialized); sdCache.put(hashKey, sd); return sd; } /** * Lower the reference count on the storage descriptor by one. If it goes to zero, then it * will be deleted. * @param sd Storage descriptor * @throws IOException */ void decrementStorageDescriptorRefCount(StorageDescriptor sd) throws IOException { byte[] key = HBaseUtils.hashStorageDescriptor(sd, md); byte[] serializedRefCnt = read(SD_TABLE, key, CATALOG_CF, REF_COUNT_COL); if (serializedRefCnt == null) { // Someone deleted it before we got to it, no worries return; } int refCnt = Integer.parseInt(new String(serializedRefCnt, HBaseUtils.ENCODING)); HTableInterface htab = conn.getHBaseTable(SD_TABLE); if (--refCnt < 1) { Delete d = new Delete(key); // We don't use checkAndDelete here because it isn't compatible with the transaction // managers. If the transaction managers are doing their jobs then we should not need it // anyway. htab.delete(d); sdCache.remove(new ByteArrayWrapper(key)); } else { Put p = new Put(key); p.add(CATALOG_CF, REF_COUNT_COL, Integer.toString(refCnt).getBytes(HBaseUtils.ENCODING)); htab.put(p); conn.flush(htab); } } /** * Place the common parts of a storage descriptor into the cache and write the storage * descriptor out to HBase. This should only be called if you are sure that the storage * descriptor needs to be added. If you have changed a table or partition but not it's storage * descriptor do not call this method, as it will increment the reference count of the storage * descriptor. * @param storageDescriptor storage descriptor to store. * @return id of the entry in the cache, to be written in for the storage descriptor */ byte[] putStorageDescriptor(StorageDescriptor storageDescriptor) throws IOException { byte[] sd = HBaseUtils.serializeStorageDescriptor(storageDescriptor); byte[] key = HBaseUtils.hashStorageDescriptor(storageDescriptor, md); byte[] serializedRefCnt = read(SD_TABLE, key, CATALOG_CF, REF_COUNT_COL); HTableInterface htab = conn.getHBaseTable(SD_TABLE); if (serializedRefCnt == null) { // We are the first to put it in the DB Put p = new Put(key); p.add(CATALOG_CF, CATALOG_COL, sd); p.add(CATALOG_CF, REF_COUNT_COL, "1".getBytes(HBaseUtils.ENCODING)); htab.put(p); sdCache.put(new ByteArrayWrapper(key), storageDescriptor); } else { // Just increment the reference count int refCnt = Integer.parseInt(new String(serializedRefCnt, HBaseUtils.ENCODING)) + 1; Put p = new Put(key); p.add(CATALOG_CF, REF_COUNT_COL, Integer.toString(refCnt).getBytes(HBaseUtils.ENCODING)); htab.put(p); } conn.flush(htab); return key; } /** * Print out a storage descriptor. * @param hash hash that is the key of the storage descriptor * @return string version of the storage descriptor */ String printStorageDescriptor(byte[] hash) throws IOException, TException { byte[] serialized = read(SD_TABLE, hash, CATALOG_CF, CATALOG_COL); if (serialized == null) return noSuch(Base64.encodeBase64URLSafeString(hash), "storage descriptor"); return dumpThriftObject(HBaseUtils.deserializeStorageDescriptor(serialized)); } /** * Print all of the storage descriptors. This doesn't take a regular expression since the key * is an md5 hash and it's hard to see how a regex on this would be useful. * @return list of all storage descriptors as strings * @throws IOException * @throws TException */ List<String> printStorageDescriptors() throws IOException, TException { Iterator<Result> results = scan(SD_TABLE, CATALOG_CF, CATALOG_COL); if (!results.hasNext()) return Arrays.asList("No storage descriptors"); List<String> lines = new ArrayList<>(); while (results.hasNext()) { Result result = results.next(); lines.add(Base64.encodeBase64URLSafeString(result.getRow()) + ": " + dumpThriftObject(HBaseUtils.deserializeStorageDescriptor(result.getValue(CATALOG_CF, CATALOG_COL)))); } return lines; } private static class ByteArrayWrapper { byte[] wrapped; ByteArrayWrapper(byte[] b) { wrapped = b; } @Override public boolean equals(Object other) { if (other instanceof ByteArrayWrapper) { return Arrays.equals(((ByteArrayWrapper)other).wrapped, wrapped); } else { return false; } } @Override public int hashCode() { return Arrays.hashCode(wrapped); } } /********************************************************************************************** * Statistics related methods *********************************************************************************************/ /** * Update statistics for one or more columns for a table or a partition. * * @param dbName database the table is in * @param tableName table to update statistics for * @param partVals partition values that define partition to update statistics for. If this is * null, then these will be assumed to be table level statistics * @param stats Stats object with stats for one or more columns * @throws IOException */ void updateStatistics(String dbName, String tableName, List<String> partVals, ColumnStatistics stats) throws IOException { byte[] key = getStatisticsKey(dbName, tableName, partVals); String hbaseTable = getStatisticsTable(partVals); byte[][] colnames = new byte[stats.getStatsObjSize()][]; byte[][] serialized = new byte[stats.getStatsObjSize()][]; for (int i = 0; i < stats.getStatsObjSize(); i++) { ColumnStatisticsObj obj = stats.getStatsObj().get(i); serialized[i] = HBaseUtils.serializeStatsForOneColumn(stats, obj); String colname = obj.getColName(); colnames[i] = HBaseUtils.buildKey(colname); } store(hbaseTable, key, STATS_CF, colnames, serialized); } /** * Get statistics for a table * * @param dbName name of database table is in * @param tblName name of table * @param colNames list of column names to get statistics for * @return column statistics for indicated table * @throws IOException */ ColumnStatistics getTableStatistics(String dbName, String tblName, List<String> colNames) throws IOException { byte[] tabKey = HBaseUtils.buildKey(dbName, tblName); ColumnStatistics tableStats = new ColumnStatistics(); ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(); statsDesc.setIsTblLevel(true); statsDesc.setDbName(dbName); statsDesc.setTableName(tblName); tableStats.setStatsDesc(statsDesc); byte[][] colKeys = new byte[colNames.size()][]; for (int i = 0; i < colKeys.length; i++) { colKeys[i] = HBaseUtils.buildKey(colNames.get(i)); } Result result = read(TABLE_TABLE, tabKey, STATS_CF, colKeys); for (int i = 0; i < colKeys.length; i++) { byte[] serializedColStats = result.getValue(STATS_CF, colKeys[i]); if (serializedColStats == null) { // There were no stats for this column, so skip it continue; } ColumnStatisticsObj obj = HBaseUtils.deserializeStatsForOneColumn(tableStats, serializedColStats); obj.setColName(colNames.get(i)); tableStats.addToStatsObj(obj); } return tableStats; } /** * Get statistics for a set of partitions * * @param dbName name of database table is in * @param tblName table partitions are in * @param partNames names of the partitions, used only to set values inside the return stats * objects * @param partVals partition values for each partition, needed because this class doesn't know how * to translate from partName to partVals * @param colNames column names to fetch stats for. These columns will be fetched for all * requested partitions * @return list of ColumnStats, one for each partition for which we found at least one column's * stats. * @throws IOException */ List<ColumnStatistics> getPartitionStatistics(String dbName, String tblName, List<String> partNames, List<List<String>> partVals, List<String> colNames) throws IOException { List<ColumnStatistics> statsList = new ArrayList<>(partNames.size()); Map<List<String>, String> valToPartMap = new HashMap<>(partNames.size()); List<Get> gets = new ArrayList<>(partNames.size() * colNames.size()); assert partNames.size() == partVals.size(); byte[][] colNameBytes = new byte[colNames.size()][]; for (int i = 0; i < colNames.size(); i++) { colNameBytes[i] = HBaseUtils.buildKey(colNames.get(i)); } for (int i = 0; i < partNames.size(); i++) { valToPartMap.put(partVals.get(i), partNames.get(i)); byte[] partKey = HBaseUtils.buildPartitionKey(dbName, tblName, HBaseUtils.getPartitionKeyTypes(getTable(dbName, tblName).getPartitionKeys()), partVals.get(i)); Get get = new Get(partKey); for (byte[] colName : colNameBytes) { get.addColumn(STATS_CF, colName); } gets.add(get); } HTableInterface htab = conn.getHBaseTable(PART_TABLE); Result[] results = htab.get(gets); for (int i = 0; i < results.length; i++) { ColumnStatistics colStats = null; for (int j = 0; j < colNameBytes.length; j++) { byte[] serializedColStats = results[i].getValue(STATS_CF, colNameBytes[j]); if (serializedColStats != null) { if (colStats == null) { // We initialize this late so that we don't create extras in the case of // partitions with no stats colStats = buildColStats(results[i].getRow(), false); statsList.add(colStats); } ColumnStatisticsObj cso = HBaseUtils.deserializeStatsForOneColumn(colStats, serializedColStats); cso.setColName(colNames.get(j)); colStats.addToStatsObj(cso); } } } return statsList; } /** * Get a reference to the stats cache. * @return the stats cache. */ StatsCache getStatsCache() { return statsCache; } /** * Get aggregated stats. Only intended for use by * {@link org.apache.hadoop.hive.metastore.hbase.StatsCache}. Others should not call directly * but should call StatsCache.get instead. * @param key The md5 hash associated with this partition set * @return stats if hbase has them, else null * @throws IOException */ AggrStats getAggregatedStats(byte[] key) throws IOException{ byte[] serialized = read(AGGR_STATS_TABLE, key, CATALOG_CF, AGGR_STATS_STATS_COL); if (serialized == null) return null; return HBaseUtils.deserializeAggrStats(serialized); } /** * Put aggregated stats Only intended for use by * {@link org.apache.hadoop.hive.metastore.hbase.StatsCache}. Others should not call directly * but should call StatsCache.put instead. * @param key The md5 hash associated with this partition set * @param dbName Database these partitions are in * @param tableName Table these partitions are in * @param partNames Partition names * @param colName Column stats are for * @param stats Stats * @throws IOException */ void putAggregatedStats(byte[] key, String dbName, String tableName, List<String> partNames, String colName, AggrStats stats) throws IOException { // Serialize the part names List<String> protoNames = new ArrayList<>(partNames.size() + 3); protoNames.add(dbName); protoNames.add(tableName); protoNames.add(colName); protoNames.addAll(partNames); // Build a bloom Filter for these partitions BloomFilter bloom = new BloomFilter(partNames.size(), STATS_BF_ERROR_RATE); for (String partName : partNames) { bloom.add(partName.getBytes(HBaseUtils.ENCODING)); } byte[] serializedFilter = HBaseUtils.serializeBloomFilter(dbName, tableName, bloom); byte[] serializedStats = HBaseUtils.serializeAggrStats(stats); store(AGGR_STATS_TABLE, key, CATALOG_CF, new byte[][]{AGGR_STATS_BLOOM_COL, AGGR_STATS_STATS_COL}, new byte[][]{serializedFilter, serializedStats}); } // TODO - We shouldn't remove an entry from the cache as soon as a single partition is deleted. // TODO - Instead we should keep track of how many partitions have been deleted and only remove // TODO - an entry once it passes a certain threshold, like 5%, of partitions have been removed. // TODO - That requires moving this from a filter to a co-processor. /** * Invalidate stats associated with the listed partitions. This method is intended for use * only by {@link org.apache.hadoop.hive.metastore.hbase.StatsCache}. * @param filter serialized version of the filter to pass * @return List of md5 hash keys for the partition stat sets that were removed. * @throws IOException */ List<StatsCache.StatsCacheKey> invalidateAggregatedStats(HbaseMetastoreProto.AggrStatsInvalidatorFilter filter) throws IOException { Iterator<Result> results = scan(AGGR_STATS_TABLE, new AggrStatsInvalidatorFilter(filter)); if (!results.hasNext()) return Collections.emptyList(); List<Delete> deletes = new ArrayList<>(); List<StatsCache.StatsCacheKey> keys = new ArrayList<>(); while (results.hasNext()) { Result result = results.next(); deletes.add(new Delete(result.getRow())); keys.add(new StatsCache.StatsCacheKey(result.getRow())); } HTableInterface htab = conn.getHBaseTable(AGGR_STATS_TABLE); htab.delete(deletes); return keys; } private byte[] getStatisticsKey(String dbName, String tableName, List<String> partVals) throws IOException { return partVals == null ? HBaseUtils.buildKey(dbName, tableName) : HBaseUtils .buildPartitionKey(dbName, tableName, HBaseUtils.getPartitionKeyTypes(getTable(dbName, tableName).getPartitionKeys()), partVals); } private String getStatisticsTable(List<String> partVals) { return partVals == null ? TABLE_TABLE : PART_TABLE; } private ColumnStatistics buildColStats(byte[] key, boolean fromTable) throws IOException { // We initialize this late so that we don't create extras in the case of // partitions with no stats ColumnStatistics colStats = new ColumnStatistics(); ColumnStatisticsDesc csd = new ColumnStatisticsDesc(); // If this is a table key, parse it as one List<String> reconstructedKey; if (fromTable) { reconstructedKey = Arrays.asList(HBaseUtils.deserializeKey(key)); csd.setIsTblLevel(true); } else { reconstructedKey = HBaseUtils.deserializePartitionKey(key, this); csd.setIsTblLevel(false); } csd.setDbName(reconstructedKey.get(0)); csd.setTableName(reconstructedKey.get(1)); if (!fromTable) { // Build the part name, for which we need the table Table table = getTable(reconstructedKey.get(0), reconstructedKey.get(1)); if (table == null) { throw new RuntimeException("Unable to find table " + reconstructedKey.get(0) + "." + reconstructedKey.get(1) + " even though I have a partition for it!"); } csd.setPartName(HBaseStore.buildExternalPartName(table, reconstructedKey.subList(2, reconstructedKey.size()))); } colStats.setStatsDesc(csd); return colStats; } /********************************************************************************************** * File metadata related methods *********************************************************************************************/ /** * @param fileIds file ID list. * @return Serialized file metadata. */ ByteBuffer[] getFileMetadata(List<Long> fileIds) throws IOException { ByteBuffer[] result = new ByteBuffer[fileIds.size()]; getFileMetadata(fileIds, result); return result; } /** * @param fileIds file ID list. * @return Serialized file metadata. */ @Override public void getFileMetadata(List<Long> fileIds, ByteBuffer[] result) throws IOException { byte[][] keys = new byte[fileIds.size()][]; for (int i = 0; i < fileIds.size(); ++i) { keys[i] = HBaseUtils.makeLongKey(fileIds.get(i)); } multiRead(FILE_METADATA_TABLE, CATALOG_CF, CATALOG_COL, keys, result); } /** * @param fileIds file ID list. * @param metadataBuffers Serialized file metadatas, one per file ID. * @param addedCols The column names for additional columns created by file-format-specific * metadata handler, to be stored in the cache. * @param addedVals The values for addedCols; one value per file ID per added column. */ @Override public void storeFileMetadata(List<Long> fileIds, List<ByteBuffer> metadataBuffers, ByteBuffer[] addedCols, ByteBuffer[][] addedVals) throws IOException, InterruptedException { byte[][] keys = new byte[fileIds.size()][]; for (int i = 0; i < fileIds.size(); ++i) { keys[i] = HBaseUtils.makeLongKey(fileIds.get(i)); } // HBase APIs are weird. To supply bytebuffer value, you have to also have bytebuffer // column name, but not column family. So there. Perhaps we should add these to constants too. ByteBuffer colNameBuf = ByteBuffer.wrap(CATALOG_COL); @SuppressWarnings("deprecation") HTableInterface htab = conn.getHBaseTable(FILE_METADATA_TABLE); List<Row> actions = new ArrayList<>(keys.length); for (int keyIx = 0; keyIx < keys.length; ++keyIx) { ByteBuffer value = (metadataBuffers != null) ? metadataBuffers.get(keyIx) : null; ByteBuffer[] av = addedVals == null ? null : addedVals[keyIx]; if (value == null) { actions.add(new Delete(keys[keyIx])); assert av == null; } else { Put p = new Put(keys[keyIx]); p.addColumn(CATALOG_CF, colNameBuf, HConstants.LATEST_TIMESTAMP, value); if (av != null) { assert av.length == addedCols.length; for (int colIx = 0; colIx < addedCols.length; ++colIx) { p.addColumn(STATS_CF, addedCols[colIx], HConstants.LATEST_TIMESTAMP, av[colIx]); } } actions.add(p); } } Object[] results = new Object[keys.length]; htab.batch(actions, results); // TODO: should we check results array? we don't care about partial results conn.flush(htab); } @Override public void storeFileMetadata(long fileId, ByteBuffer metadata, ByteBuffer[] addedCols, ByteBuffer[] addedVals) throws IOException, InterruptedException { @SuppressWarnings("deprecation") HTableInterface htab = conn.getHBaseTable(FILE_METADATA_TABLE); Put p = new Put(HBaseUtils.makeLongKey(fileId)); p.addColumn(CATALOG_CF, ByteBuffer.wrap(CATALOG_COL), HConstants.LATEST_TIMESTAMP, metadata); assert (addedCols == null && addedVals == null) || (addedCols.length == addedVals.length); if (addedCols != null) { for (int i = 0; i < addedCols.length; ++i) { p.addColumn(STATS_CF, addedCols[i], HConstants.LATEST_TIMESTAMP, addedVals[i]); } } htab.put(p); conn.flush(htab); } /********************************************************************************************** * Security related methods *********************************************************************************************/ /** * Fetch a delegation token * @param tokId identifier of the token to fetch * @return the delegation token, or null if there is no such delegation token * @throws IOException */ String getDelegationToken(String tokId) throws IOException { byte[] key = HBaseUtils.buildKey(tokId); byte[] serialized = read(SECURITY_TABLE, key, CATALOG_CF, DELEGATION_TOKEN_COL); if (serialized == null) return null; return HBaseUtils.deserializeDelegationToken(serialized); } /** * Get all delegation token ids * @return list of all delegation token identifiers * @throws IOException */ List<String> scanDelegationTokenIdentifiers() throws IOException { Iterator<Result> iter = scan(SECURITY_TABLE, CATALOG_CF, DELEGATION_TOKEN_COL); List<String> ids = new ArrayList<>(); while (iter.hasNext()) { Result result = iter.next(); byte[] serialized = result.getValue(CATALOG_CF, DELEGATION_TOKEN_COL); if (serialized != null) { // Don't deserialize the value, as what we're after is the key. We just had to check the // value wasn't null in order to check this is a record with a delegation token and not a // master key. ids.add(new String(result.getRow(), HBaseUtils.ENCODING)); } } return ids; } /** * Store a delegation token * @param tokId token id * @param token delegation token to store * @throws IOException */ void putDelegationToken(String tokId, String token) throws IOException { byte[][] serialized = HBaseUtils.serializeDelegationToken(tokId, token); store(SECURITY_TABLE, serialized[0], CATALOG_CF, DELEGATION_TOKEN_COL, serialized[1]); } /** * Delete a delegation token * @param tokId identifier of token to drop * @throws IOException */ void deleteDelegationToken(String tokId) throws IOException { byte[] key = HBaseUtils.buildKey(tokId); delete(SECURITY_TABLE, key, CATALOG_CF, DELEGATION_TOKEN_COL); } /** * Fetch a master key * @param seqNo sequence number of the master key * @return the master key, or null if there is no such master key * @throws IOException */ String getMasterKey(Integer seqNo) throws IOException { byte[] key = HBaseUtils.buildKey(seqNo.toString()); byte[] serialized = read(SECURITY_TABLE, key, CATALOG_CF, MASTER_KEY_COL); if (serialized == null) return null; return HBaseUtils.deserializeMasterKey(serialized); } /** * Get all master keys * @return list of all master keys * @throws IOException */ List<String> scanMasterKeys() throws IOException { Iterator<Result> iter = scan(SECURITY_TABLE, CATALOG_CF, MASTER_KEY_COL); List<String> keys = new ArrayList<>(); while (iter.hasNext()) { Result result = iter.next(); byte[] serialized = result.getValue(CATALOG_CF, MASTER_KEY_COL); if (serialized != null) { keys.add(HBaseUtils.deserializeMasterKey(serialized)); } } return keys; } /** * Store a master key * @param seqNo sequence number * @param key master key to store * @throws IOException */ void putMasterKey(Integer seqNo, String key) throws IOException { byte[][] serialized = HBaseUtils.serializeMasterKey(seqNo, key); store(SECURITY_TABLE, serialized[0], CATALOG_CF, MASTER_KEY_COL, serialized[1]); } /** * Delete a master key * @param seqNo sequence number of master key to delete * @throws IOException */ void deleteMasterKey(Integer seqNo) throws IOException { byte[] key = HBaseUtils.buildKey(seqNo.toString()); delete(SECURITY_TABLE, key, CATALOG_CF, MASTER_KEY_COL); } /** * One method to print all rows in the security table. It's not expected to be large. * @return each row as one string * @throws IOException */ List<String> printSecurity() throws IOException { HTableInterface htab = conn.getHBaseTable(SECURITY_TABLE); Scan scan = new Scan(); scan.addColumn(CATALOG_CF, MASTER_KEY_COL); scan.addColumn(CATALOG_CF, DELEGATION_TOKEN_COL); Iterator<Result> iter = htab.getScanner(scan).iterator(); if (!iter.hasNext()) return Arrays.asList("No security related entries"); List<String> lines = new ArrayList<>(); while (iter.hasNext()) { Result result = iter.next(); byte[] val = result.getValue(CATALOG_CF, MASTER_KEY_COL); if (val != null) { int seqNo = Integer.parseInt(new String(result.getRow(), HBaseUtils.ENCODING)); lines.add("Master key " + seqNo + ": " + HBaseUtils.deserializeMasterKey(val)); } else { val = result.getValue(CATALOG_CF, DELEGATION_TOKEN_COL); if (val == null) throw new RuntimeException("Huh? No master key, no delegation token!"); lines.add("Delegation token " + new String(result.getRow(), HBaseUtils.ENCODING) + ": " + HBaseUtils.deserializeDelegationToken(val)); } } return lines; } /********************************************************************************************** * Sequence methods *********************************************************************************************/ long peekAtSequence(byte[] sequence) throws IOException { byte[] serialized = read(SEQUENCES_TABLE, sequence, CATALOG_CF, CATALOG_COL); return serialized == null ? 0 : Long.parseLong(new String(serialized, HBaseUtils.ENCODING)); } long getNextSequence(byte[] sequence) throws IOException { byte[] serialized = read(SEQUENCES_TABLE, sequence, CATALOG_CF, CATALOG_COL); long val = 0; if (serialized != null) { val = Long.parseLong(new String(serialized, HBaseUtils.ENCODING)); } byte[] incrSerialized = new Long(val + 1).toString().getBytes(HBaseUtils.ENCODING); store(SEQUENCES_TABLE, sequence, CATALOG_CF, CATALOG_COL, incrSerialized); return val; } /** * One method to print all entries in the sequence table. It's not expected to be large. * @return each sequence as one string * @throws IOException */ List<String> printSequences() throws IOException { HTableInterface htab = conn.getHBaseTable(SEQUENCES_TABLE); Iterator<Result> iter = scan(SEQUENCES_TABLE, CATALOG_CF, CATALOG_COL, null); List<String> sequences = new ArrayList<>(); if (!iter.hasNext()) return Arrays.asList("No sequences"); while (iter.hasNext()) { Result result = iter.next(); sequences.add(new StringBuilder(new String(result.getRow(), HBaseUtils.ENCODING)) .append(": ") .append(new String(result.getValue(CATALOG_CF, CATALOG_COL), HBaseUtils.ENCODING)) .toString()); } return sequences; } /********************************************************************************************** * Constraints (pk/fk) related methods *********************************************************************************************/ /** * Fetch a primary key * @param dbName database the table is in * @param tableName table name * @return List of primary key objects, which together make up one key * @throws IOException if there's a read error */ List<SQLPrimaryKey> getPrimaryKey(String dbName, String tableName) throws IOException { byte[] key = HBaseUtils.buildKey(dbName, tableName); byte[] serialized = read(TABLE_TABLE, key, CATALOG_CF, PRIMARY_KEY_COL); if (serialized == null) return null; return HBaseUtils.deserializePrimaryKey(dbName, tableName, serialized); } /** * Fetch a the foreign keys for a table * @param dbName database the table is in * @param tableName table name * @return All of the foreign key columns thrown together in one list. Have fun sorting them out. * @throws IOException if there's a read error */ List<SQLForeignKey> getForeignKeys(String dbName, String tableName) throws IOException { byte[] key = HBaseUtils.buildKey(dbName, tableName); byte[] serialized = read(TABLE_TABLE, key, CATALOG_CF, FOREIGN_KEY_COL); if (serialized == null) return null; return HBaseUtils.deserializeForeignKeys(dbName, tableName, serialized); } /** * Create a primary key on a table. * @param pk Primary key for this table * @throws IOException if unable to write the data to the store. */ void putPrimaryKey(List<SQLPrimaryKey> pk) throws IOException { byte[][] serialized = HBaseUtils.serializePrimaryKey(pk); store(TABLE_TABLE, serialized[0], CATALOG_CF, PRIMARY_KEY_COL, serialized[1]); } /** * Create one or more foreign keys on a table. Note that this will not add a foreign key, it * will overwrite whatever is there. So if you wish to add a key to a table that may already * foreign keys you need to first use {@link #getForeignKeys(String, String)} to fetch the * existing keys, add to the list, and then call this. * @param fks Foreign key(s) for this table * @throws IOException if unable to write the data to the store. */ void putForeignKeys(List<SQLForeignKey> fks) throws IOException { byte[][] serialized = HBaseUtils.serializeForeignKeys(fks); store(TABLE_TABLE, serialized[0], CATALOG_CF, FOREIGN_KEY_COL, serialized[1]); } /** * Drop the primary key from a table. * @param dbName database the table is in * @param tableName table name * @throws IOException if unable to delete from the store */ void deletePrimaryKey(String dbName, String tableName) throws IOException { byte[] key = HBaseUtils.buildKey(dbName, tableName); delete(TABLE_TABLE, key, CATALOG_CF, PRIMARY_KEY_COL); } /** * Drop all foreign keys from a table. Note that this will drop all keys blindly. You should * only call this if you're sure you want to drop them all. If you just want to drop one you * should instead all {@link #getForeignKeys(String, String)}, modify the list it returns, and * then call {@link #putForeignKeys(List)}. * @param dbName database the table is in * @param tableName table name * @throws IOException if unable to delete from the store */ void deleteForeignKeys(String dbName, String tableName) throws IOException { byte[] key = HBaseUtils.buildKey(dbName, tableName); delete(TABLE_TABLE, key, CATALOG_CF, FOREIGN_KEY_COL); } /********************************************************************************************** * Cache methods *********************************************************************************************/ /** * This should be called whenever a new query is started. */ void flushCatalogCache() { if (LOG.isDebugEnabled()) { for (Counter counter : counters) { LOG.debug(counter.dump()); counter.clear(); } statsCache.dumpCounters(); } tableCache.flush(); sdCache.flush(); partCache.flush(); flushRoleCache(); } private void flushRoleCache() { roleCache.clear(); entireRoleTableInCache = false; } /********************************************************************************************** * General access methods *********************************************************************************************/ private void store(String table, byte[] key, byte[] colFam, byte[] colName, byte[] obj) throws IOException { HTableInterface htab = conn.getHBaseTable(table); Put p = new Put(key); p.add(colFam, colName, obj); htab.put(p); conn.flush(htab); } private void store(String table, byte[] key, byte[] colFam, byte[][] colName, byte[][] obj) throws IOException { HTableInterface htab = conn.getHBaseTable(table); Put p = new Put(key); for (int i = 0; i < colName.length; i++) { p.add(colFam, colName[i], obj[i]); } htab.put(p); conn.flush(htab); } private byte[] read(String table, byte[] key, byte[] colFam, byte[] colName) throws IOException { HTableInterface htab = conn.getHBaseTable(table); Get g = new Get(key); g.addColumn(colFam, colName); Result res = htab.get(g); return res.getValue(colFam, colName); } private void multiRead(String table, byte[] colFam, byte[] colName, byte[][] keys, ByteBuffer[] resultDest) throws IOException { assert keys.length == resultDest.length; @SuppressWarnings("deprecation") HTableInterface htab = conn.getHBaseTable(table); List<Get> gets = new ArrayList<>(keys.length); for (byte[] key : keys) { Get g = new Get(key); g.addColumn(colFam, colName); gets.add(g); } Result[] results = htab.get(gets); for (int i = 0; i < results.length; ++i) { Result r = results[i]; if (r.isEmpty()) { resultDest[i] = null; } else { Cell cell = r.getColumnLatestCell(colFam, colName); resultDest[i] = ByteBuffer.wrap( cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); } } } private void multiModify(String table, byte[][] keys, byte[] colFam, byte[] colName, List<ByteBuffer> values) throws IOException, InterruptedException { assert values == null || keys.length == values.size(); // HBase APIs are weird. To supply bytebuffer value, you have to also have bytebuffer // column name, but not column family. So there. Perhaps we should add these to constants too. ByteBuffer colNameBuf = ByteBuffer.wrap(colName); @SuppressWarnings("deprecation") HTableInterface htab = conn.getHBaseTable(table); List<Row> actions = new ArrayList<>(keys.length); for (int i = 0; i < keys.length; ++i) { ByteBuffer value = (values != null) ? values.get(i) : null; if (value == null) { actions.add(new Delete(keys[i])); } else { Put p = new Put(keys[i]); p.addColumn(colFam, colNameBuf, HConstants.LATEST_TIMESTAMP, value); actions.add(p); } } Object[] results = new Object[keys.length]; htab.batch(actions, results); // TODO: should we check results array? we don't care about partial results conn.flush(htab); } private Result read(String table, byte[] key, byte[] colFam, byte[][] colNames) throws IOException { HTableInterface htab = conn.getHBaseTable(table); Get g = new Get(key); for (byte[] colName : colNames) g.addColumn(colFam, colName); return htab.get(g); } // Delete a row. If colFam and colName are not null, then only the named column will be // deleted. If colName is null and colFam is not, only the named family will be deleted. If // both are null the entire row will be deleted. private void delete(String table, byte[] key, byte[] colFam, byte[] colName) throws IOException { HTableInterface htab = conn.getHBaseTable(table); Delete d = new Delete(key); if (colName != null) d.deleteColumn(colFam, colName); else if (colFam != null) d.deleteFamily(colFam); htab.delete(d); } private Iterator<Result> scan(String table, byte[] colFam, byte[] colName) throws IOException { return scan(table, null, null, colFam, colName, null); } private Iterator<Result> scan(String table, byte[] colFam, byte[] colName, Filter filter) throws IOException { return scan(table, null, null, colFam, colName, filter); } private Iterator<Result> scan(String table, Filter filter) throws IOException { return scan(table, null, null, null, null, filter); } private Iterator<Result> scan(String table, byte[] keyStart, byte[] keyEnd, byte[] colFam, byte[] colName, Filter filter) throws IOException { HTableInterface htab = conn.getHBaseTable(table); Scan s = new Scan(); if (keyStart != null) { s.setStartRow(keyStart); } if (keyEnd != null) { s.setStopRow(keyEnd); } if (colFam != null && colName != null) { s.addColumn(colFam, colName); } if (filter != null) { s.setFilter(filter); } ResultScanner scanner = htab.getScanner(s); return scanner.iterator(); } /********************************************************************************************** * Printing methods *********************************************************************************************/ private String noSuch(String name, String type) { return "No such " + type + ": " + name.replaceAll(HBaseUtils.KEY_SEPARATOR_STR, "."); } private List<String> noMatch(String regex, String type) { return Arrays.asList("No matching " + type + ": " + regex); } private String dumpThriftObject(TBase obj) throws TException, UnsupportedEncodingException { TMemoryBuffer buf = new TMemoryBuffer(1000); TProtocol protocol = new TSimpleJSONProtocol(buf); obj.write(protocol); return buf.toString("UTF-8"); } /********************************************************************************************** * Testing methods and classes *********************************************************************************************/ @VisibleForTesting int countStorageDescriptor() throws IOException { ResultScanner scanner = conn.getHBaseTable(SD_TABLE).getScanner(new Scan()); int cnt = 0; Result r; do { r = scanner.next(); if (r != null) { LOG.debug("Saw record with hash " + Base64.encodeBase64String(r.getRow())); cnt++; } } while (r != null); return cnt; } /** * Use this for unit testing only, so that a mock connection object can be passed in. * @param connection Mock connection objecct */ @VisibleForTesting static void setTestConnection(HBaseConnection connection) { testConn = connection; } // For testing without the cache private static class BogusObjectCache<K, V> extends ObjectCache<K, V> { static Counter bogus = new Counter("bogus"); BogusObjectCache() { super(1, bogus, bogus, bogus); } @Override V get(K key) { return null; } } private static class BogusPartitionCache extends PartitionCache { static Counter bogus = new Counter("bogus"); BogusPartitionCache() { super(1, bogus, bogus, bogus); } @Override Collection<Partition> getAllForTable(String dbName, String tableName) { return null; } @Override Partition get(String dbName, String tableName, List<String> partVals) { return null; } } }