HiveMetaStoreChecker.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.metadata;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;

import com.google.common.collect.Sets;
import org.apache.hadoop.hive.common.StringInternUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult;
import org.apache.thrift.TException;

import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

/**
 * Verify that the information in the metastore matches what is on the
 * filesystem. Return a CheckResult object containing lists of missing and any
 * unexpected tables and partitions.
 */
public class HiveMetaStoreChecker {

  public static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreChecker.class);
  public static final String CLASS_NAME = HiveMetaStoreChecker.class.getName();

  private final Hive hive;
  private final HiveConf conf;

  public HiveMetaStoreChecker(Hive hive) {
    super();
    this.hive = hive;
    conf = hive.getConf();
  }

  /**
   * Check the metastore for inconsistencies, data missing in either the
   * metastore or on the dfs.
   *
   * @param dbName
   *          name of the database, if not specified the default will be used.
   * @param tableName
   *          Table we want to run the check for. If null we'll check all the
   *          tables in the database.
   * @param partitions
   *          List of partition name value pairs, if null or empty check all
   *          partitions
   * @param result
   *          Fill this with the results of the check
   * @throws HiveException
   *           Failed to get required information from the metastore.
   * @throws IOException
   *           Most likely filesystem related
   */
  public void checkMetastore(String dbName, String tableName,
      List<? extends Map<String, String>> partitions, CheckResult result)
      throws HiveException, IOException {

    if (dbName == null || "".equalsIgnoreCase(dbName)) {
      dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
    }

    try {
      if (tableName == null || "".equals(tableName)) {
        // no table specified, check all tables and all partitions.
        List<String> tables = hive.getTablesForDb(dbName, ".*");
        for (String currentTableName : tables) {
          checkTable(dbName, currentTableName, null, result);
        }

        findUnknownTables(dbName, tables, result);
      } else if (partitions == null || partitions.isEmpty()) {
        // only one table, let's check all partitions
        checkTable(dbName, tableName, null, result);
      } else {
        // check the specified partitions
        checkTable(dbName, tableName, partitions, result);
      }
      LOG.info("Number of partitionsNotInMs=" + result.getPartitionsNotInMs()
              + ", partitionsNotOnFs=" + result.getPartitionsNotOnFs()
              + ", tablesNotInMs=" + result.getTablesNotInMs()
              + ", tablesNotOnFs=" + result.getTablesNotOnFs());
    } catch (MetaException e) {
      throw new HiveException(e);
    } catch (TException e) {
      throw new HiveException(e);
    }
  }

  /**
   * Check for table directories that aren't in the metastore.
   *
   * @param dbName
   *          Name of the database
   * @param tables
   *          List of table names
   * @param result
   *          Add any found tables to this
   * @throws HiveException
   *           Failed to get required information from the metastore.
   * @throws IOException
   *           Most likely filesystem related
   * @throws MetaException
   *           Failed to get required information from the metastore.
   * @throws NoSuchObjectException
   *           Failed to get required information from the metastore.
   * @throws TException
   *           Thrift communication error.
   */
  void findUnknownTables(String dbName, List<String> tables, CheckResult result)
      throws IOException, MetaException, TException, HiveException {

    Set<Path> dbPaths = new HashSet<Path>();
    Set<String> tableNames = new HashSet<String>(tables);

    for (String tableName : tables) {
      Table table = hive.getTable(dbName, tableName);
      // hack, instead figure out a way to get the db paths
      String isExternal = table.getParameters().get("EXTERNAL");
      if (isExternal == null || !"TRUE".equalsIgnoreCase(isExternal)) {
        dbPaths.add(table.getPath().getParent());
      }
    }

    for (Path dbPath : dbPaths) {
      FileSystem fs = dbPath.getFileSystem(conf);
      FileStatus[] statuses = fs.listStatus(dbPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
      for (FileStatus status : statuses) {

        if (status.isDir() && !tableNames.contains(status.getPath().getName())) {

          result.getTablesNotInMs().add(status.getPath().getName());
        }
      }
    }
  }

  /**
   * Check the metastore for inconsistencies, data missing in either the
   * metastore or on the dfs.
   *
   * @param dbName
   *          Name of the database
   * @param tableName
   *          Name of the table
   * @param partitions
   *          Partitions to check, if null or empty get all the partitions.
   * @param result
   *          Result object
   * @throws HiveException
   *           Failed to get required information from the metastore.
   * @throws IOException
   *           Most likely filesystem related
   * @throws MetaException
   *           Failed to get required information from the metastore.
   */
  void checkTable(String dbName, String tableName,
      List<? extends Map<String, String>> partitions, CheckResult result)
      throws MetaException, IOException, HiveException {

    Table table = null;

    try {
      table = hive.getTable(dbName, tableName);
    } catch (HiveException e) {
      result.getTablesNotInMs().add(tableName);
      return;
    }

    PartitionIterable parts;
    boolean findUnknownPartitions = true;

    if (table.isPartitioned()) {
      if (partitions == null || partitions.isEmpty()) {
        String mode = HiveConf.getVar(conf, ConfVars.HIVEMAPREDMODE, (String) null);
        if ("strict".equalsIgnoreCase(mode)) {
          parts = new PartitionIterable(hive, table, null, conf.getIntVar(
              HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
        } else {
          List<Partition> loadedPartitions = new ArrayList<>();
          PerfLogger perfLogger = SessionState.getPerfLogger();
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
          loadedPartitions.addAll(hive.getAllPartitionsOf(table));
          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
          parts = new PartitionIterable(loadedPartitions);
        }
      } else {
        // we're interested in specific partitions,
        // don't check for any others
        findUnknownPartitions = false;
        List<Partition> loadedPartitions = new ArrayList<>();
        for (Map<String, String> map : partitions) {
          Partition part = hive.getPartition(table, map, false);
          if (part == null) {
            PartitionResult pr = new PartitionResult();
            pr.setTableName(tableName);
            pr.setPartitionName(Warehouse.makePartPath(map));
            result.getPartitionsNotInMs().add(pr);
          } else {
            loadedPartitions.add(part);
          }
        }
        parts = new PartitionIterable(loadedPartitions);
      }
    } else {
      parts = new PartitionIterable(Collections.<Partition>emptyList());
    }

    checkTable(table, parts, findUnknownPartitions, result);
  }

  /**
   * Check the metastore for inconsistencies, data missing in either the
   * metastore or on the dfs.
   *
   * @param table
   *          Table to check
   * @param parts
   *          Partitions to check
   * @param result
   *          Result object
   * @param findUnknownPartitions
   *          Should we try to find unknown partitions?
   * @throws IOException
   *           Could not get information from filesystem
   * @throws HiveException
   *           Could not create Partition object
   */
  void checkTable(Table table, PartitionIterable parts,
      boolean findUnknownPartitions, CheckResult result) throws IOException,
      HiveException {

    Path tablePath = table.getPath();
    FileSystem fs = tablePath.getFileSystem(conf);
    if (!fs.exists(tablePath)) {
      result.getTablesNotOnFs().add(table.getTableName());
      return;
    }

    Set<Path> partPaths = new HashSet<Path>();

    // check that the partition folders exist on disk
    for (Partition partition : parts) {
      if (partition == null) {
        // most likely the user specified an invalid partition
        continue;
      }
      Path partPath = partition.getDataLocation();
      fs = partPath.getFileSystem(conf);
      if (!fs.exists(partPath)) {
        PartitionResult pr = new PartitionResult();
        pr.setPartitionName(partition.getName());
        pr.setTableName(partition.getTable().getTableName());
        result.getPartitionsNotOnFs().add(pr);
      }

      for (int i = 0; i < partition.getSpec().size(); i++) {
        Path qualifiedPath = partPath.makeQualified(fs);
        StringInternUtils.internUriStringsInPath(qualifiedPath);
        partPaths.add(qualifiedPath);
        partPath = partPath.getParent();
      }
    }

    if (findUnknownPartitions) {
      findUnknownPartitions(table, partPaths, result);
    }
  }

  /**
   * Find partitions on the fs that are unknown to the metastore.
   *
   * @param table
   *          Table where the partitions would be located
   * @param partPaths
   *          Paths of the partitions the ms knows about
   * @param result
   *          Result object
   * @throws IOException
   *           Thrown if we fail at fetching listings from the fs.
   * @throws HiveException 
   */
  void findUnknownPartitions(Table table, Set<Path> partPaths,
      CheckResult result) throws IOException, HiveException {

    Path tablePath = table.getPath();
    // now check the table folder and see if we find anything
    // that isn't in the metastore
    Set<Path> allPartDirs = new HashSet<Path>();
    checkPartitionDirs(tablePath, allPartDirs, Collections.unmodifiableList(table.getPartColNames()));
    // don't want the table dir
    allPartDirs.remove(tablePath);

    // remove the partition paths we know about
    allPartDirs.removeAll(partPaths);

    Set<String> partColNames = Sets.newHashSet();
    for(FieldSchema fSchema : table.getPartCols()) {
      partColNames.add(fSchema.getName());
    }

    // we should now only have the unexpected folders left
    for (Path partPath : allPartDirs) {
      FileSystem fs = partPath.getFileSystem(conf);
      String partitionName = getPartitionName(fs.makeQualified(tablePath),
          partPath, partColNames);
      LOG.debug("PartitionName: " + partitionName);

      if (partitionName != null) {
        PartitionResult pr = new PartitionResult();
        pr.setPartitionName(partitionName);
        pr.setTableName(table.getTableName());

        result.getPartitionsNotInMs().add(pr);
      }
    }
    LOG.debug("Number of partitions not in metastore : " + result.getPartitionsNotInMs().size());
  }

  /**
   * Get the partition name from the path.
   *
   * @param tablePath
   *          Path of the table.
   * @param partitionPath
   *          Path of the partition.
   * @param partCols
   *          Set of partition columns from table definition
   * @return Partition name, for example partitiondate=2008-01-01
   */
  static String getPartitionName(Path tablePath, Path partitionPath,
      Set<String> partCols) {
    String result = null;
    Path currPath = partitionPath;
    LOG.debug("tablePath:" + tablePath + ", partCols: " + partCols);

    while (currPath != null && !tablePath.equals(currPath)) {
      // format: partition=p_val
      // Add only when table partition colName matches
      String[] parts = currPath.getName().split("=");
      if (parts != null && parts.length > 0) {
        if (parts.length != 2) {
          LOG.warn(currPath.getName() + " is not a valid partition name");
          return result;
        }

        String partitionName = parts[0];
        if (partCols.contains(partitionName)) {
          if (result == null) {
            result = currPath.getName();
          } else {
            result = currPath.getName() + Path.SEPARATOR + result;
          }
        }
      }
      currPath = currPath.getParent();
      LOG.debug("currPath=" + currPath);
    }
    return result;
  }

  /**
   * Assume that depth is 2, i.e., partition columns are a and b
   * tblPath/a=1  => throw exception
   * tblPath/a=1/file => throw exception
   * tblPath/a=1/b=2/file => return a=1/b=2
   * tblPath/a=1/b=2/c=3 => return a=1/b=2
   * tblPath/a=1/b=2/c=3/file => return a=1/b=2
   *
   * @param basePath
   *          Start directory
   * @param allDirs
   *          This set will contain the leaf paths at the end.
   * @param list
   *          Specify how deep the search goes.
   * @throws IOException
   *           Thrown if we can't get lists from the fs.
   * @throws HiveException
   */

  private void checkPartitionDirs(Path basePath, Set<Path> allDirs, final List<String> partColNames) throws IOException, HiveException {
    // Here we just reuse the THREAD_COUNT configuration for
    // METASTORE_FS_HANDLER_THREADS_COUNT since this results in better performance
    // The number of missing partitions discovered are later added by metastore using a
    // threadpool of size METASTORE_FS_HANDLER_THREADS_COUNT. If we have different sized
    // pool here the smaller sized pool of the two becomes a bottleneck
    int poolSize = conf.getInt(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT.varname, 15);

    ExecutorService executor;
    if (poolSize <= 1) {
      LOG.debug("Using single-threaded version of MSCK-GetPaths");
      executor = MoreExecutors.sameThreadExecutor();
    } else {
      LOG.debug("Using multi-threaded version of MSCK-GetPaths with number of threads " + poolSize);
      ThreadFactory threadFactory =
          new ThreadFactoryBuilder().setDaemon(true).setNameFormat("MSCK-GetPaths-%d").build();
      executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(poolSize, threadFactory);
    }
    checkPartitionDirs(executor, basePath, allDirs, basePath.getFileSystem(conf), partColNames);

    executor.shutdown();
  }

  private final class PathDepthInfoCallable implements Callable<Path> {
    private final List<String> partColNames;
    private final FileSystem fs;
    private final ConcurrentLinkedQueue<PathDepthInfo> pendingPaths;
    private final boolean throwException;
    private final PathDepthInfo pd;

    private PathDepthInfoCallable(PathDepthInfo pd, List<String> partColNames, FileSystem fs,
        ConcurrentLinkedQueue<PathDepthInfo> basePaths) {
      this.partColNames = partColNames;
      this.pd = pd;
      this.fs = fs;
      this.pendingPaths = basePaths;
      this.throwException = "throw"
      .equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION));
    }

    @Override
    public Path call() throws Exception {
      return processPathDepthInfo(pd);
    }

    private Path processPathDepthInfo(final PathDepthInfo pd)
        throws IOException, HiveException, InterruptedException {
      final Path currentPath = pd.p;
      final int currentDepth = pd.depth;
      FileStatus[] fileStatuses = fs.listStatus(currentPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
      // found no files under a sub-directory under table base path; it is possible that the table
      // is empty and hence there are no partition sub-directories created under base path
      if (fileStatuses.length == 0 && currentDepth > 0 && currentDepth < partColNames.size()) {
        // since maxDepth is not yet reached, we are missing partition
        // columns in currentPath
        logOrThrowExceptionWithMsg(
            "MSCK is missing partition columns under " + currentPath.toString());
      } else {
        // found files under currentPath add them to the queue if it is a directory
        for (FileStatus fileStatus : fileStatuses) {
          if (!fileStatus.isDirectory() && currentDepth < partColNames.size()) {
            // found a file at depth which is less than number of partition keys
            logOrThrowExceptionWithMsg(
                "MSCK finds a file rather than a directory when it searches for "
                    + fileStatus.getPath().toString());
          } else if (fileStatus.isDirectory() && currentDepth < partColNames.size()) {
            // found a sub-directory at a depth less than number of partition keys
            // validate if the partition directory name matches with the corresponding
            // partition colName at currentDepth
            Path nextPath = fileStatus.getPath();
            String[] parts = nextPath.getName().split("=");
            if (parts.length != 2) {
              logOrThrowExceptionWithMsg("Invalid partition name " + nextPath);
            } else if (!parts[0].equalsIgnoreCase(partColNames.get(currentDepth))) {
              logOrThrowExceptionWithMsg(
                  "Unexpected partition key " + parts[0] + " found at " + nextPath);
            } else {
              // add sub-directory to the work queue if maxDepth is not yet reached
              pendingPaths.add(new PathDepthInfo(nextPath, currentDepth + 1));
            }
          }
        }
        if (currentDepth == partColNames.size()) {
          return currentPath;
        }
      }
      return null;
    }

    private void logOrThrowExceptionWithMsg(String msg) throws HiveException {
      if(throwException) {
        throw new HiveException(msg);
      } else {
        LOG.warn(msg);
      }
    }
  }

  private static class PathDepthInfo {
    private final Path p;
    private final int depth;
    PathDepthInfo(Path p, int depth) {
      this.p = p;
      this.depth = depth;
    }
  }

  private void checkPartitionDirs(final ExecutorService executor,
      final Path basePath, final Set<Path> result,
      final FileSystem fs, final List<String> partColNames) throws HiveException {
    try {
      Queue<Future<Path>> futures = new LinkedList<Future<Path>>();
      ConcurrentLinkedQueue<PathDepthInfo> nextLevel = new ConcurrentLinkedQueue<>();
      nextLevel.add(new PathDepthInfo(basePath, 0));
      //Uses level parallel implementation of a bfs. Recursive DFS implementations
      //have a issue where the number of threads can run out if the number of
      //nested sub-directories is more than the pool size.
      //Using a two queue implementation is simpler than one queue since then we will
      //have to add the complex mechanisms to let the free worker threads know when new levels are
      //discovered using notify()/wait() mechanisms which can potentially lead to bugs if
      //not done right
      while(!nextLevel.isEmpty()) {
        ConcurrentLinkedQueue<PathDepthInfo> tempQueue = new ConcurrentLinkedQueue<>();
        //process each level in parallel
        while(!nextLevel.isEmpty()) {
          futures.add(
              executor.submit(new PathDepthInfoCallable(nextLevel.poll(), partColNames, fs, tempQueue)));
        }
        while(!futures.isEmpty()) {
          Path p = futures.poll().get();
          if (p != null) {
            result.add(p);
          }
        }
        //update the nextlevel with newly discovered sub-directories from the above
        nextLevel = tempQueue;
      }
    } catch (InterruptedException | ExecutionException e) {
      LOG.error(e.getMessage());
      executor.shutdownNow();
      throw new HiveException(e.getCause());
    }
  }
}