/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.txn.compactor; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreThread; import org.apache.hadoop.hive.metastore.RawStore; import org.apache.hadoop.hive.metastore.RawStoreProxy; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.txn.CompactionInfo; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; /** * Superclass for all threads in the compactor. */ abstract class CompactorThread extends Thread implements MetaStoreThread { static final private String CLASS_NAME = CompactorThread.class.getName(); static final private Logger LOG = LoggerFactory.getLogger(CLASS_NAME); protected HiveConf conf; protected TxnStore txnHandler; protected RawStore rs; protected int threadId; protected AtomicBoolean stop; protected AtomicBoolean looped; @Override public void setHiveConf(HiveConf conf) { this.conf = conf; } @Override public void setThreadId(int threadId) { this.threadId = threadId; } @Override public void init(AtomicBoolean stop, AtomicBoolean looped) throws MetaException { this.stop = stop; this.looped = looped; setPriority(MIN_PRIORITY); setDaemon(true); // this means the process will exit without waiting for this thread // Get our own instance of the transaction handler txnHandler = TxnUtils.getTxnStore(conf); // Get our own connection to the database so we can get table and partition information. rs = RawStoreProxy.getProxy(conf, conf, conf.getVar(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL), threadId); } /** * Find the table being compacted * @param ci compaction info returned from the compaction queue * @return metastore table * @throws org.apache.hadoop.hive.metastore.api.MetaException if the table cannot be found. */ protected Table resolveTable(CompactionInfo ci) throws MetaException { try { return rs.getTable(ci.dbname, ci.tableName); } catch (MetaException e) { LOG.error("Unable to find table " + ci.getFullTableName() + ", " + e.getMessage()); throw e; } } /** * Get the partition being compacted. * @param ci compaction info returned from the compaction queue * @return metastore partition, or null if there is not partition in this compaction info * @throws Exception if underlying calls throw, or if the partition name resolves to more than * one partition. */ protected Partition resolvePartition(CompactionInfo ci) throws Exception { if (ci.partName != null) { List<Partition> parts = null; try { parts = rs.getPartitionsByNames(ci.dbname, ci.tableName, Collections.singletonList(ci.partName)); if (parts == null || parts.size() == 0) { // The partition got dropped before we went looking for it. return null; } } catch (Exception e) { LOG.error("Unable to find partition " + ci.getFullPartitionName() + ", " + e.getMessage()); throw e; } if (parts.size() != 1) { LOG.error(ci.getFullPartitionName() + " does not refer to a single partition. " + parts); throw new MetaException("Too many partitions for : " + ci.getFullPartitionName()); } return parts.get(0); } else { return null; } } /** * Get the storage descriptor for a compaction. * @param t table from {@link #resolveTable(org.apache.hadoop.hive.metastore.txn.CompactionInfo)} * @param p table from {@link #resolvePartition(org.apache.hadoop.hive.metastore.txn.CompactionInfo)} * @return metastore storage descriptor. */ protected StorageDescriptor resolveStorageDescriptor(Table t, Partition p) { return (p == null) ? t.getSd() : p.getSd(); } /** * Determine which user to run an operation as, based on the owner of the directory to be * compacted. It is asserted that either the user running the hive metastore or the table * owner must be able to stat the directory and determine the owner. * @param location directory that will be read or written to. * @param t metastore table object * @return username of the owner of the location. * @throws java.io.IOException if neither the hive metastore user nor the table owner can stat * the location. */ protected String findUserToRunAs(String location, Table t) throws IOException, InterruptedException { LOG.debug("Determining who to run the job as."); final Path p = new Path(location); final FileSystem fs = p.getFileSystem(conf); try { FileStatus stat = fs.getFileStatus(p); LOG.debug("Running job as " + stat.getOwner()); return stat.getOwner(); } catch (AccessControlException e) { // TODO not sure this is the right exception LOG.debug("Unable to stat file as current user, trying as table owner"); // Now, try it as the table owner and see if we get better luck. final List<String> wrapper = new ArrayList<String>(1); UserGroupInformation ugi = UserGroupInformation.createProxyUser(t.getOwner(), UserGroupInformation.getLoginUser()); ugi.doAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { FileStatus stat = fs.getFileStatus(p); wrapper.add(stat.getOwner()); return null; } }); try { FileSystem.closeAllForUGI(ugi); } catch (IOException exception) { LOG.error("Could not clean up file-system handles for UGI: " + ugi, exception); } if (wrapper.size() == 1) { LOG.debug("Running job as " + wrapper.get(0)); return wrapper.get(0); } } LOG.error("Unable to stat file " + p + " as either current user(" + UserGroupInformation.getLoginUser() + ") or table owner(" + t.getOwner() + "), giving up"); throw new IOException("Unable to stat file: " + p); } /** * Determine whether to run this job as the current user or whether we need a doAs to switch * users. * @param owner of the directory we will be working in, as determined by * {@link #findUserToRunAs(String, org.apache.hadoop.hive.metastore.api.Table)} * @return true if the job should run as the current user, false if a doAs is needed. */ protected boolean runJobAsSelf(String owner) { return (owner.equals(System.getProperty("user.name"))); } protected String tableName(Table t) { return t.getDbName() + "." + t.getTableName(); } }