/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hive.metastore.hbase;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.protobuf.ByteString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregator;
import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregatorFactory;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
/**
* A cache for stats. This is only intended for use by
* {@link org.apache.hadoop.hive.metastore.hbase.HBaseReadWrite} and should not be used outside
* that class.
*/
class StatsCache {
private static final Logger LOG = LoggerFactory.getLogger(StatsCache.class.getName());
private static StatsCache self = null;
private LoadingCache<StatsCacheKey, AggrStats> cache;
private Invalidator invalidator;
private long runInvalidatorEvery;
private long maxTimeInCache;
private boolean invalidatorHasRun;
@VisibleForTesting Counter misses;
@VisibleForTesting Counter hbaseHits;
@VisibleForTesting Counter totalGets;
static synchronized StatsCache getInstance(Configuration conf) {
if (self == null) {
self = new StatsCache(conf);
}
return self;
}
private StatsCache(final Configuration conf) {
final StatsCache me = this;
cache = CacheBuilder.newBuilder()
.maximumSize(
HiveConf.getIntVar(conf, HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES))
.expireAfterWrite(HiveConf.getTimeVar(conf,
HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_MEMORY_TTL, TimeUnit.SECONDS), TimeUnit.SECONDS)
.build(new CacheLoader<StatsCacheKey, AggrStats>() {
@Override
public AggrStats load(StatsCacheKey key) throws Exception {
int numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
HBaseReadWrite hrw = HBaseReadWrite.getInstance();
AggrStats aggrStats = hrw.getAggregatedStats(key.hashed);
if (aggrStats == null) {
misses.incr();
ColumnStatsAggregator aggregator = null;
aggrStats = new AggrStats();
LOG.debug("Unable to find aggregated stats for " + key.colName + ", aggregating");
List<ColumnStatistics> css = hrw.getPartitionStatistics(key.dbName, key.tableName,
key.partNames, HBaseStore.partNameListToValsList(key.partNames),
Collections.singletonList(key.colName));
if (css != null && css.size() > 0) {
aggrStats.setPartsFound(css.size());
if (aggregator == null) {
aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css.iterator()
.next().getStatsObj().iterator().next().getStatsData().getSetField(),
numBitVectors, useDensityFunctionForNDVEstimation);
}
ColumnStatisticsObj statsObj = aggregator
.aggregate(key.colName, key.partNames, css);
aggrStats.addToColStats(statsObj);
me.put(key, aggrStats);
}
} else {
hbaseHits.incr();
}
return aggrStats;
}
});
misses = new Counter("Stats cache table misses");
hbaseHits = new Counter("Stats cache table hits");
totalGets = new Counter("Total get calls to the stats cache");
maxTimeInCache = HiveConf.getTimeVar(conf,
HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_HBASE_TTL, TimeUnit.SECONDS);
// We want runEvery in milliseconds, even though we give the default value in the conf in
// seconds.
runInvalidatorEvery = HiveConf.getTimeVar(conf,
HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY, TimeUnit.MILLISECONDS);
invalidator = new Invalidator();
invalidator.setDaemon(true);
invalidator.start();
}
/**
* Add an object to the cache.
* @param key Key for this entry
* @param aggrStats stats
* @throws java.io.IOException
*/
void put(StatsCacheKey key, AggrStats aggrStats) throws IOException {
HBaseReadWrite.getInstance().putAggregatedStats(key.hashed, key.dbName, key.tableName,
key.partNames,
key.colName, aggrStats);
cache.put(key, aggrStats);
}
/**
* Get partition level statistics
* @param dbName name of database table is in
* @param tableName name of table
* @param partNames names of the partitions
* @param colName of column to get stats for
* @return stats object for this column, or null if none cached
* @throws java.io.IOException
*/
AggrStats get(String dbName, String tableName, List<String> partNames, String colName)
throws IOException {
totalGets.incr();
StatsCacheKey key = new StatsCacheKey(dbName, tableName, partNames, colName);
try {
return cache.get(key);
} catch (ExecutionException e) {
throw new IOException(e);
}
}
/**
* Remove all entries that are related to a particular set of partitions. This should be
* called when partitions are deleted or stats are updated.
* @param dbName name of database table is in
* @param tableName name of table
* @param partName name of the partition
* @throws IOException
*/
void invalidate(String dbName, String tableName, String partName)
throws IOException {
invalidator.addToQueue(
HbaseMetastoreProto.AggrStatsInvalidatorFilter.Entry.newBuilder()
.setDbName(ByteString.copyFrom(dbName.getBytes(HBaseUtils.ENCODING)))
.setTableName(ByteString.copyFrom(tableName.getBytes(HBaseUtils.ENCODING)))
.setPartName(ByteString.copyFrom(partName.getBytes(HBaseUtils.ENCODING)))
.build());
}
void dumpCounters() {
LOG.debug(misses.dump());
LOG.debug(hbaseHits.dump());
LOG.debug(totalGets.dump());
}
/**
* Completely dump the cache from memory, used to test that we can access stats from HBase itself.
* @throws IOException
*/
@VisibleForTesting void flushMemory() throws IOException {
cache.invalidateAll();
}
@VisibleForTesting void resetCounters() {
misses.clear();
hbaseHits.clear();
totalGets.clear();
}
@VisibleForTesting void setRunInvalidatorEvery(long runEvery) {
runInvalidatorEvery = runEvery;
}
@VisibleForTesting void setMaxTimeInCache(long maxTime) {
maxTimeInCache = maxTime;
}
@VisibleForTesting void wakeInvalidator() throws InterruptedException {
invalidatorHasRun = false;
// Wait through 2 cycles so we're sure our entry won't be picked as too new.
Thread.sleep(2 * runInvalidatorEvery);
invalidator.interrupt();
while (!invalidatorHasRun) {
Thread.sleep(10);
}
}
static class StatsCacheKey {
final byte[] hashed;
String dbName;
String tableName;
List<String> partNames;
String colName;
private MessageDigest md;
StatsCacheKey(byte[] key) {
hashed = key;
}
StatsCacheKey(String dbName, String tableName, List<String> partNames, String colName) {
this.dbName = dbName;
this.tableName = tableName;
this.partNames = partNames;
this.colName = colName;
try {
md = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
md.update(dbName.getBytes(HBaseUtils.ENCODING));
md.update(tableName.getBytes(HBaseUtils.ENCODING));
Collections.sort(this.partNames);
for (String s : partNames) {
md.update(s.getBytes(HBaseUtils.ENCODING));
}
md.update(colName.getBytes(HBaseUtils.ENCODING));
hashed = md.digest();
}
@Override
public boolean equals(Object other) {
if (other == null || !(other instanceof StatsCacheKey)) return false;
StatsCacheKey that = (StatsCacheKey)other;
return Arrays.equals(hashed, that.hashed);
}
@Override
public int hashCode() {
return Arrays.hashCode(hashed);
}
}
private class Invalidator extends Thread {
private List<HbaseMetastoreProto.AggrStatsInvalidatorFilter.Entry> entries = new ArrayList<>();
private Lock lock = new ReentrantLock();
void addToQueue(HbaseMetastoreProto.AggrStatsInvalidatorFilter.Entry entry) {
lock.lock();
try {
entries.add(entry);
} finally {
lock.unlock();
}
}
@Override
public void run() {
while (true) {
long startedAt = System.currentTimeMillis();
List<HbaseMetastoreProto.AggrStatsInvalidatorFilter.Entry> thisRun = null;
lock.lock();
try {
if (entries.size() > 0) {
thisRun = entries;
entries = new ArrayList<>();
}
} finally {
lock.unlock();
}
if (thisRun != null) {
try {
HbaseMetastoreProto.AggrStatsInvalidatorFilter filter =
HbaseMetastoreProto.AggrStatsInvalidatorFilter.newBuilder()
.setRunEvery(runInvalidatorEvery)
.setMaxCacheEntryLife(maxTimeInCache)
.addAllToInvalidate(thisRun)
.build();
List<StatsCacheKey> keys =
HBaseReadWrite.getInstance().invalidateAggregatedStats(filter);
cache.invalidateAll(keys);
} catch (IOException e) {
// Not a lot I can do here
LOG.error("Caught error while invalidating entries in the cache", e);
}
}
invalidatorHasRun = true;
try {
sleep(runInvalidatorEvery - (System.currentTimeMillis() - startedAt));
} catch (InterruptedException e) {
LOG.warn("Interupted while sleeping", e);
}
}
}
}
}