/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.metadata;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import com.google.common.collect.Sets;
import org.apache.hadoop.hive.common.StringInternUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult;
import org.apache.thrift.TException;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* Verify that the information in the metastore matches what is on the
* filesystem. Return a CheckResult object containing lists of missing and any
* unexpected tables and partitions.
*/
public class HiveMetaStoreChecker {
public static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreChecker.class);
public static final String CLASS_NAME = HiveMetaStoreChecker.class.getName();
private final Hive hive;
private final HiveConf conf;
public HiveMetaStoreChecker(Hive hive) {
super();
this.hive = hive;
conf = hive.getConf();
}
/**
* Check the metastore for inconsistencies, data missing in either the
* metastore or on the dfs.
*
* @param dbName
* name of the database, if not specified the default will be used.
* @param tableName
* Table we want to run the check for. If null we'll check all the
* tables in the database.
* @param partitions
* List of partition name value pairs, if null or empty check all
* partitions
* @param result
* Fill this with the results of the check
* @throws HiveException
* Failed to get required information from the metastore.
* @throws IOException
* Most likely filesystem related
*/
public void checkMetastore(String dbName, String tableName,
List<? extends Map<String, String>> partitions, CheckResult result)
throws HiveException, IOException {
if (dbName == null || "".equalsIgnoreCase(dbName)) {
dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
}
try {
if (tableName == null || "".equals(tableName)) {
// no table specified, check all tables and all partitions.
List<String> tables = hive.getTablesForDb(dbName, ".*");
for (String currentTableName : tables) {
checkTable(dbName, currentTableName, null, result);
}
findUnknownTables(dbName, tables, result);
} else if (partitions == null || partitions.isEmpty()) {
// only one table, let's check all partitions
checkTable(dbName, tableName, null, result);
} else {
// check the specified partitions
checkTable(dbName, tableName, partitions, result);
}
LOG.info("Number of partitionsNotInMs=" + result.getPartitionsNotInMs()
+ ", partitionsNotOnFs=" + result.getPartitionsNotOnFs()
+ ", tablesNotInMs=" + result.getTablesNotInMs()
+ ", tablesNotOnFs=" + result.getTablesNotOnFs());
} catch (MetaException e) {
throw new HiveException(e);
} catch (TException e) {
throw new HiveException(e);
}
}
/**
* Check for table directories that aren't in the metastore.
*
* @param dbName
* Name of the database
* @param tables
* List of table names
* @param result
* Add any found tables to this
* @throws HiveException
* Failed to get required information from the metastore.
* @throws IOException
* Most likely filesystem related
* @throws MetaException
* Failed to get required information from the metastore.
* @throws NoSuchObjectException
* Failed to get required information from the metastore.
* @throws TException
* Thrift communication error.
*/
void findUnknownTables(String dbName, List<String> tables, CheckResult result)
throws IOException, MetaException, TException, HiveException {
Set<Path> dbPaths = new HashSet<Path>();
Set<String> tableNames = new HashSet<String>(tables);
for (String tableName : tables) {
Table table = hive.getTable(dbName, tableName);
// hack, instead figure out a way to get the db paths
String isExternal = table.getParameters().get("EXTERNAL");
if (isExternal == null || !"TRUE".equalsIgnoreCase(isExternal)) {
dbPaths.add(table.getPath().getParent());
}
}
for (Path dbPath : dbPaths) {
FileSystem fs = dbPath.getFileSystem(conf);
FileStatus[] statuses = fs.listStatus(dbPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
for (FileStatus status : statuses) {
if (status.isDir() && !tableNames.contains(status.getPath().getName())) {
result.getTablesNotInMs().add(status.getPath().getName());
}
}
}
}
/**
* Check the metastore for inconsistencies, data missing in either the
* metastore or on the dfs.
*
* @param dbName
* Name of the database
* @param tableName
* Name of the table
* @param partitions
* Partitions to check, if null or empty get all the partitions.
* @param result
* Result object
* @throws HiveException
* Failed to get required information from the metastore.
* @throws IOException
* Most likely filesystem related
* @throws MetaException
* Failed to get required information from the metastore.
*/
void checkTable(String dbName, String tableName,
List<? extends Map<String, String>> partitions, CheckResult result)
throws MetaException, IOException, HiveException {
Table table = null;
try {
table = hive.getTable(dbName, tableName);
} catch (HiveException e) {
result.getTablesNotInMs().add(tableName);
return;
}
PartitionIterable parts;
boolean findUnknownPartitions = true;
if (table.isPartitioned()) {
if (partitions == null || partitions.isEmpty()) {
String mode = HiveConf.getVar(conf, ConfVars.HIVEMAPREDMODE, (String) null);
if ("strict".equalsIgnoreCase(mode)) {
parts = new PartitionIterable(hive, table, null, conf.getIntVar(
HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
} else {
List<Partition> loadedPartitions = new ArrayList<>();
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
loadedPartitions.addAll(hive.getAllPartitionsOf(table));
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
parts = new PartitionIterable(loadedPartitions);
}
} else {
// we're interested in specific partitions,
// don't check for any others
findUnknownPartitions = false;
List<Partition> loadedPartitions = new ArrayList<>();
for (Map<String, String> map : partitions) {
Partition part = hive.getPartition(table, map, false);
if (part == null) {
PartitionResult pr = new PartitionResult();
pr.setTableName(tableName);
pr.setPartitionName(Warehouse.makePartPath(map));
result.getPartitionsNotInMs().add(pr);
} else {
loadedPartitions.add(part);
}
}
parts = new PartitionIterable(loadedPartitions);
}
} else {
parts = new PartitionIterable(Collections.<Partition>emptyList());
}
checkTable(table, parts, findUnknownPartitions, result);
}
/**
* Check the metastore for inconsistencies, data missing in either the
* metastore or on the dfs.
*
* @param table
* Table to check
* @param parts
* Partitions to check
* @param result
* Result object
* @param findUnknownPartitions
* Should we try to find unknown partitions?
* @throws IOException
* Could not get information from filesystem
* @throws HiveException
* Could not create Partition object
*/
void checkTable(Table table, PartitionIterable parts,
boolean findUnknownPartitions, CheckResult result) throws IOException,
HiveException {
Path tablePath = table.getPath();
FileSystem fs = tablePath.getFileSystem(conf);
if (!fs.exists(tablePath)) {
result.getTablesNotOnFs().add(table.getTableName());
return;
}
Set<Path> partPaths = new HashSet<Path>();
// check that the partition folders exist on disk
for (Partition partition : parts) {
if (partition == null) {
// most likely the user specified an invalid partition
continue;
}
Path partPath = partition.getDataLocation();
fs = partPath.getFileSystem(conf);
if (!fs.exists(partPath)) {
PartitionResult pr = new PartitionResult();
pr.setPartitionName(partition.getName());
pr.setTableName(partition.getTable().getTableName());
result.getPartitionsNotOnFs().add(pr);
}
for (int i = 0; i < partition.getSpec().size(); i++) {
Path qualifiedPath = partPath.makeQualified(fs);
StringInternUtils.internUriStringsInPath(qualifiedPath);
partPaths.add(qualifiedPath);
partPath = partPath.getParent();
}
}
if (findUnknownPartitions) {
findUnknownPartitions(table, partPaths, result);
}
}
/**
* Find partitions on the fs that are unknown to the metastore.
*
* @param table
* Table where the partitions would be located
* @param partPaths
* Paths of the partitions the ms knows about
* @param result
* Result object
* @throws IOException
* Thrown if we fail at fetching listings from the fs.
* @throws HiveException
*/
void findUnknownPartitions(Table table, Set<Path> partPaths,
CheckResult result) throws IOException, HiveException {
Path tablePath = table.getPath();
// now check the table folder and see if we find anything
// that isn't in the metastore
Set<Path> allPartDirs = new HashSet<Path>();
checkPartitionDirs(tablePath, allPartDirs, Collections.unmodifiableList(table.getPartColNames()));
// don't want the table dir
allPartDirs.remove(tablePath);
// remove the partition paths we know about
allPartDirs.removeAll(partPaths);
Set<String> partColNames = Sets.newHashSet();
for(FieldSchema fSchema : table.getPartCols()) {
partColNames.add(fSchema.getName());
}
// we should now only have the unexpected folders left
for (Path partPath : allPartDirs) {
FileSystem fs = partPath.getFileSystem(conf);
String partitionName = getPartitionName(fs.makeQualified(tablePath),
partPath, partColNames);
LOG.debug("PartitionName: " + partitionName);
if (partitionName != null) {
PartitionResult pr = new PartitionResult();
pr.setPartitionName(partitionName);
pr.setTableName(table.getTableName());
result.getPartitionsNotInMs().add(pr);
}
}
LOG.debug("Number of partitions not in metastore : " + result.getPartitionsNotInMs().size());
}
/**
* Get the partition name from the path.
*
* @param tablePath
* Path of the table.
* @param partitionPath
* Path of the partition.
* @param partCols
* Set of partition columns from table definition
* @return Partition name, for example partitiondate=2008-01-01
*/
static String getPartitionName(Path tablePath, Path partitionPath,
Set<String> partCols) {
String result = null;
Path currPath = partitionPath;
LOG.debug("tablePath:" + tablePath + ", partCols: " + partCols);
while (currPath != null && !tablePath.equals(currPath)) {
// format: partition=p_val
// Add only when table partition colName matches
String[] parts = currPath.getName().split("=");
if (parts != null && parts.length > 0) {
if (parts.length != 2) {
LOG.warn(currPath.getName() + " is not a valid partition name");
return result;
}
String partitionName = parts[0];
if (partCols.contains(partitionName)) {
if (result == null) {
result = currPath.getName();
} else {
result = currPath.getName() + Path.SEPARATOR + result;
}
}
}
currPath = currPath.getParent();
LOG.debug("currPath=" + currPath);
}
return result;
}
/**
* Assume that depth is 2, i.e., partition columns are a and b
* tblPath/a=1 => throw exception
* tblPath/a=1/file => throw exception
* tblPath/a=1/b=2/file => return a=1/b=2
* tblPath/a=1/b=2/c=3 => return a=1/b=2
* tblPath/a=1/b=2/c=3/file => return a=1/b=2
*
* @param basePath
* Start directory
* @param allDirs
* This set will contain the leaf paths at the end.
* @param list
* Specify how deep the search goes.
* @throws IOException
* Thrown if we can't get lists from the fs.
* @throws HiveException
*/
private void checkPartitionDirs(Path basePath, Set<Path> allDirs, final List<String> partColNames) throws IOException, HiveException {
// Here we just reuse the THREAD_COUNT configuration for
// METASTORE_FS_HANDLER_THREADS_COUNT since this results in better performance
// The number of missing partitions discovered are later added by metastore using a
// threadpool of size METASTORE_FS_HANDLER_THREADS_COUNT. If we have different sized
// pool here the smaller sized pool of the two becomes a bottleneck
int poolSize = conf.getInt(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT.varname, 15);
ExecutorService executor;
if (poolSize <= 1) {
LOG.debug("Using single-threaded version of MSCK-GetPaths");
executor = MoreExecutors.sameThreadExecutor();
} else {
LOG.debug("Using multi-threaded version of MSCK-GetPaths with number of threads " + poolSize);
ThreadFactory threadFactory =
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("MSCK-GetPaths-%d").build();
executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(poolSize, threadFactory);
}
checkPartitionDirs(executor, basePath, allDirs, basePath.getFileSystem(conf), partColNames);
executor.shutdown();
}
private final class PathDepthInfoCallable implements Callable<Path> {
private final List<String> partColNames;
private final FileSystem fs;
private final ConcurrentLinkedQueue<PathDepthInfo> pendingPaths;
private final boolean throwException;
private final PathDepthInfo pd;
private PathDepthInfoCallable(PathDepthInfo pd, List<String> partColNames, FileSystem fs,
ConcurrentLinkedQueue<PathDepthInfo> basePaths) {
this.partColNames = partColNames;
this.pd = pd;
this.fs = fs;
this.pendingPaths = basePaths;
this.throwException = "throw"
.equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION));
}
@Override
public Path call() throws Exception {
return processPathDepthInfo(pd);
}
private Path processPathDepthInfo(final PathDepthInfo pd)
throws IOException, HiveException, InterruptedException {
final Path currentPath = pd.p;
final int currentDepth = pd.depth;
FileStatus[] fileStatuses = fs.listStatus(currentPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
// found no files under a sub-directory under table base path; it is possible that the table
// is empty and hence there are no partition sub-directories created under base path
if (fileStatuses.length == 0 && currentDepth > 0 && currentDepth < partColNames.size()) {
// since maxDepth is not yet reached, we are missing partition
// columns in currentPath
logOrThrowExceptionWithMsg(
"MSCK is missing partition columns under " + currentPath.toString());
} else {
// found files under currentPath add them to the queue if it is a directory
for (FileStatus fileStatus : fileStatuses) {
if (!fileStatus.isDirectory() && currentDepth < partColNames.size()) {
// found a file at depth which is less than number of partition keys
logOrThrowExceptionWithMsg(
"MSCK finds a file rather than a directory when it searches for "
+ fileStatus.getPath().toString());
} else if (fileStatus.isDirectory() && currentDepth < partColNames.size()) {
// found a sub-directory at a depth less than number of partition keys
// validate if the partition directory name matches with the corresponding
// partition colName at currentDepth
Path nextPath = fileStatus.getPath();
String[] parts = nextPath.getName().split("=");
if (parts.length != 2) {
logOrThrowExceptionWithMsg("Invalid partition name " + nextPath);
} else if (!parts[0].equalsIgnoreCase(partColNames.get(currentDepth))) {
logOrThrowExceptionWithMsg(
"Unexpected partition key " + parts[0] + " found at " + nextPath);
} else {
// add sub-directory to the work queue if maxDepth is not yet reached
pendingPaths.add(new PathDepthInfo(nextPath, currentDepth + 1));
}
}
}
if (currentDepth == partColNames.size()) {
return currentPath;
}
}
return null;
}
private void logOrThrowExceptionWithMsg(String msg) throws HiveException {
if(throwException) {
throw new HiveException(msg);
} else {
LOG.warn(msg);
}
}
}
private static class PathDepthInfo {
private final Path p;
private final int depth;
PathDepthInfo(Path p, int depth) {
this.p = p;
this.depth = depth;
}
}
private void checkPartitionDirs(final ExecutorService executor,
final Path basePath, final Set<Path> result,
final FileSystem fs, final List<String> partColNames) throws HiveException {
try {
Queue<Future<Path>> futures = new LinkedList<Future<Path>>();
ConcurrentLinkedQueue<PathDepthInfo> nextLevel = new ConcurrentLinkedQueue<>();
nextLevel.add(new PathDepthInfo(basePath, 0));
//Uses level parallel implementation of a bfs. Recursive DFS implementations
//have a issue where the number of threads can run out if the number of
//nested sub-directories is more than the pool size.
//Using a two queue implementation is simpler than one queue since then we will
//have to add the complex mechanisms to let the free worker threads know when new levels are
//discovered using notify()/wait() mechanisms which can potentially lead to bugs if
//not done right
while(!nextLevel.isEmpty()) {
ConcurrentLinkedQueue<PathDepthInfo> tempQueue = new ConcurrentLinkedQueue<>();
//process each level in parallel
while(!nextLevel.isEmpty()) {
futures.add(
executor.submit(new PathDepthInfoCallable(nextLevel.poll(), partColNames, fs, tempQueue)));
}
while(!futures.isEmpty()) {
Path p = futures.poll().get();
if (p != null) {
result.add(p);
}
}
//update the nextlevel with newly discovered sub-directories from the above
nextLevel = tempQueue;
}
} catch (InterruptedException | ExecutionException e) {
LOG.error(e.getMessage());
executor.shutdownNow();
throw new HiveException(e.getCause());
}
}
}