Cleaner.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.txn.compactor;

import org.apache.hadoop.hive.metastore.txn.TxnStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidReadTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;

/**
 * A class to clean directories after compactions.  This will run in a separate thread.
 */
public class Cleaner extends CompactorThread {
  static final private String CLASS_NAME = Cleaner.class.getName();
  static final private Logger LOG = LoggerFactory.getLogger(CLASS_NAME);

  private long cleanerCheckInterval = 0;

  // List of compactions to clean.
  private Map<Long, Set<Long>> compactId2LockMap = new HashMap<Long, Set<Long>>();
  private Map<Long, CompactionInfo> compactId2CompactInfoMap = new HashMap<Long, CompactionInfo>();

  @Override
  public void run() {
    if (cleanerCheckInterval == 0) {
      cleanerCheckInterval = conf.getTimeVar(
          HiveConf.ConfVars.HIVE_COMPACTOR_CLEANER_RUN_INTERVAL, TimeUnit.MILLISECONDS);
    }

    do {
      // This is solely for testing.  It checks if the test has set the looped value to false,
      // and if so remembers that and then sets it to true at the end.  We have to check here
      // first to make sure we go through a complete iteration of the loop before resetting it.
      boolean setLooped = !looped.get();
      TxnStore.MutexAPI.LockHandle handle = null;
      long startedAt = -1;
      // Make sure nothing escapes this run method and kills the metastore at large,
      // so wrap it in a big catch Throwable statement.
      try {
        handle = txnHandler.getMutexAPI().acquireLock(TxnStore.MUTEX_KEY.Cleaner.name());
        startedAt = System.currentTimeMillis();
        // First look for all the compactions that are waiting to be cleaned.  If we have not
        // seen an entry before, look for all the locks held on that table or partition and
        // record them.  We will then only clean the partition once all of those locks have been
        // released.  This way we avoid removing the files while they are in use,
        // while at the same time avoiding starving the cleaner as new readers come along.
        // This works because we know that any reader who comes along after the worker thread has
        // done the compaction will read the more up to date version of the data (either in a
        // newer delta or in a newer base).
        List<CompactionInfo> toClean = txnHandler.findReadyToClean();
        {
          /**
           * Since there may be more than 1 instance of Cleaner running we may have state info
           * for items which were cleaned by instances.  Here we remove them.
           *
           * In the long run if we add end_time to compaction_queue, then we can check that
           * hive_locks.acquired_at > compaction_queue.end_time + safety_buffer in which case
           * we know the lock owner is reading files created by this compaction or later.
           * The advantage is that we don't have to store the locks.
           */
          Set<Long> currentToCleanSet = new HashSet<>();
          for (CompactionInfo ci : toClean) {
            currentToCleanSet.add(ci.id);
          }
          Set<Long> cleanPerformedByOthers = new HashSet<>();
          for (long id : compactId2CompactInfoMap.keySet()) {
            if (!currentToCleanSet.contains(id)) {
              cleanPerformedByOthers.add(id);
            }
          }
          for (long id : cleanPerformedByOthers) {
            compactId2CompactInfoMap.remove(id);
            compactId2LockMap.remove(id);
          }
        }
        if (toClean.size() > 0 || compactId2LockMap.size() > 0) {
          ShowLocksResponse locksResponse = txnHandler.showLocks(new ShowLocksRequest());

          for (CompactionInfo ci : toClean) {
            // Check to see if we have seen this request before.  If so, ignore it.  If not,
            // add it to our queue.
            if (!compactId2LockMap.containsKey(ci.id)) {
              compactId2LockMap.put(ci.id, findRelatedLocks(ci, locksResponse));
              compactId2CompactInfoMap.put(ci.id, ci);
            }
          }

          // Now, for each entry in the queue, see if all of the associated locks are clear so we
          // can clean
          Set<Long> currentLocks = buildCurrentLockSet(locksResponse);
          List<Long> expiredLocks = new ArrayList<Long>();
          List<Long> compactionsCleaned = new ArrayList<Long>();
          try {
            for (Map.Entry<Long, Set<Long>> queueEntry : compactId2LockMap.entrySet()) {
              boolean sawLock = false;
              for (Long lockId : queueEntry.getValue()) {
                if (currentLocks.contains(lockId)) {
                  sawLock = true;
                  break;
                } else {
                  expiredLocks.add(lockId);
                }
              }

              if (!sawLock) {
                // Remember to remove this when we're out of the loop,
                // we can't do it in the loop or we'll get a concurrent modification exception.
                compactionsCleaned.add(queueEntry.getKey());
                //Future thought: this may be expensive so consider having a thread pool run in parallel
                clean(compactId2CompactInfoMap.get(queueEntry.getKey()));
              } else {
                // Remove the locks we didn't see so we don't look for them again next time
                for (Long lockId : expiredLocks) {
                  queueEntry.getValue().remove(lockId);
                }
              }
            }
          } finally {
            if (compactionsCleaned.size() > 0) {
              for (Long compactId : compactionsCleaned) {
                compactId2LockMap.remove(compactId);
                compactId2CompactInfoMap.remove(compactId);
              }
            }
          }
        }
      } catch (Throwable t) {
        LOG.error("Caught an exception in the main loop of compactor cleaner, " +
            StringUtils.stringifyException(t));
      }
      finally {
        if (handle != null) {
          handle.releaseLocks();
        }
      }
      if (setLooped) {
        looped.set(true);
      }
      // Now, go back to bed until it's time to do this again
      long elapsedTime = System.currentTimeMillis() - startedAt;
      if (elapsedTime >= cleanerCheckInterval || stop.get())  {
        continue;
      } else {
        try {
          Thread.sleep(cleanerCheckInterval - elapsedTime);
        } catch (InterruptedException ie) {
          // What can I do about it?
        }
      }
    } while (!stop.get());
  }

  private Set<Long> findRelatedLocks(CompactionInfo ci, ShowLocksResponse locksResponse) {
    Set<Long> relatedLocks = new HashSet<Long>();
    for (ShowLocksResponseElement lock : locksResponse.getLocks()) {
      if (ci.dbname.equals(lock.getDbname())) {
        if ((ci.tableName == null && lock.getTablename() == null) ||
            (ci.tableName != null && ci.tableName.equals(lock.getTablename()))) {
          if ((ci.partName == null && lock.getPartname() == null) ||
              (ci.partName != null && ci.partName.equals(lock.getPartname()))) {
            relatedLocks.add(lock.getLockid());
          }
        }
      }
    }

    return relatedLocks;
  }

  private Set<Long> buildCurrentLockSet(ShowLocksResponse locksResponse) {
    Set<Long> currentLocks = new HashSet<Long>(locksResponse.getLocks().size());
    for (ShowLocksResponseElement lock : locksResponse.getLocks()) {
      currentLocks.add(lock.getLockid());
    }
    return currentLocks;
  }

  private void clean(CompactionInfo ci) throws MetaException {
    LOG.info("Starting cleaning for " + ci.getFullPartitionName());
    try {
      Table t = resolveTable(ci);
      if (t == null) {
        // The table was dropped before we got around to cleaning it.
        LOG.info("Unable to find table " + ci.getFullTableName() + ", assuming it was dropped");
        txnHandler.markCleaned(ci);
        return;
      }
      Partition p = null;
      if (ci.partName != null) {
        p = resolvePartition(ci);
        if (p == null) {
          // The partition was dropped before we got around to cleaning it.
          LOG.info("Unable to find partition " + ci.getFullPartitionName() +
              ", assuming it was dropped");
          txnHandler.markCleaned(ci);
          return;
        }
      }
      StorageDescriptor sd = resolveStorageDescriptor(t, p);
      final String location = sd.getLocation();

      /**
       * Each Compaction only compacts as far as the highest txn id such that all txns below it
       * are resolved (i.e. not opened).  This is what "highestTxnId" tracks.  This is only tracked
       * since Hive 1.3.0/2.0 - thus may be 0.  See ValidCompactorTxnList and uses for more info.
       * 
       * We only want to clean up to the highestTxnId - otherwise we risk deleteing deltas from
       * under an active reader.
       * 
       * Suppose we have deltas D2 D3 for table T, i.e. the last compaction created D3 so now there is a 
       * clean request for D2.  
       * Cleaner checks existing locks and finds none.
       * Between that check and removeFiles() a query starts (it will be reading D3) and another compaction
       * completes which creates D4.
       * Now removeFiles() (more specifically AcidUtils.getAcidState()) will declare D3 to be obsolete
       * unless ValidTxnList is "capped" at highestTxnId.
       */
      final ValidTxnList txnList = ci.highestTxnId > 0 ? 
        new ValidReadTxnList(new long[0], new BitSet(), ci.highestTxnId) : new ValidReadTxnList();

      if (runJobAsSelf(ci.runAs)) {
        removeFiles(location, txnList);
      } else {
        LOG.info("Cleaning as user " + ci.runAs + " for " + ci.getFullPartitionName());
        UserGroupInformation ugi = UserGroupInformation.createProxyUser(ci.runAs,
            UserGroupInformation.getLoginUser());
        ugi.doAs(new PrivilegedExceptionAction<Object>() {
          @Override
          public Object run() throws Exception {
            removeFiles(location, txnList);
            return null;
          }
        });
        try {
          FileSystem.closeAllForUGI(ugi);
        } catch (IOException exception) {
          LOG.error("Could not clean up file-system handles for UGI: " + ugi + " for " +
              ci.getFullPartitionName(), exception);
        }
      }
      txnHandler.markCleaned(ci);
    } catch (Exception e) {
      LOG.error("Caught exception when cleaning, unable to complete cleaning of " + ci + " " +
          StringUtils.stringifyException(e));
      txnHandler.markFailed(ci);
    }
  }

  private void removeFiles(String location, ValidTxnList txnList) throws IOException {
    AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(location), conf, txnList);
    List<FileStatus> obsoleteDirs = dir.getObsolete();
    List<Path> filesToDelete = new ArrayList<Path>(obsoleteDirs.size());
    for (FileStatus stat : obsoleteDirs) {
      filesToDelete.add(stat.getPath());
    }
    if (filesToDelete.size() < 1) {
      LOG.warn("Hmm, nothing to delete in the cleaner for directory " + location +
          ", that hardly seems right.");
      return;
    }
    LOG.info("About to remove " + filesToDelete.size() + " obsolete directories from " + location);
    FileSystem fs = filesToDelete.get(0).getFileSystem(conf);

    for (Path dead : filesToDelete) {
      LOG.debug("Going to delete path " + dead.toString());
      fs.delete(dead, true);
    }
  }

}