/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.txn.compactor;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.mapred.JobConf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
import org.apache.hadoop.hive.metastore.txn.TxnUtils;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.security.PrivilegedExceptionAction;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* A class to do compactions. This will run in a separate thread. It will spin on the
* compaction queue and look for new work to do.
*/
public class Worker extends CompactorThread {
static final private String CLASS_NAME = Worker.class.getName();
static final private Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
static final private long SLEEP_TIME = 5000;
static final private int baseThreadNum = 10002;
private String name;
private JobConf mrJob; // the MR job for compaction
/**
* Get the hostname that this worker is run on. Made static and public so that other classes
* can use the same method to know what host their worker threads are running on.
* @return hostname
*/
public static String hostname() {
try {
return InetAddress.getLocalHost().getHostName();
} catch (UnknownHostException e) {
LOG.error("Unable to resolve my host name " + e.getMessage());
throw new RuntimeException(e);
}
}
//todo: this doesn;t check if compaction is already running (even though Initiator does but we
// don't go through Initiator for user initiated compactions)
@Override
public void run() {
do {
boolean launchedJob = false;
// Make sure nothing escapes this run method and kills the metastore at large,
// so wrap it in a big catch Throwable statement.
try {
final CompactionInfo ci = txnHandler.findNextToCompact(name);
if (ci == null && !stop.get()) {
try {
Thread.sleep(SLEEP_TIME);
continue;
} catch (InterruptedException e) {
LOG.warn("Worker thread sleep interrupted " + e.getMessage());
continue;
}
}
// Find the table we will be working with.
Table t1 = null;
try {
t1 = resolveTable(ci);
if (t1 == null) {
LOG.info("Unable to find table " + ci.getFullTableName() +
", assuming it was dropped and moving on.");
txnHandler.markCleaned(ci);
continue;
}
} catch (MetaException e) {
txnHandler.markCleaned(ci);
continue;
}
// This chicanery is to get around the fact that the table needs to be final in order to
// go into the doAs below.
final Table t = t1;
// Find the partition we will be working with, if there is one.
Partition p = null;
try {
p = resolvePartition(ci);
if (p == null && ci.partName != null) {
LOG.info("Unable to find partition " + ci.getFullPartitionName() +
", assuming it was dropped and moving on.");
txnHandler.markCleaned(ci);
continue;
}
} catch (Exception e) {
txnHandler.markCleaned(ci);
continue;
}
// Find the appropriate storage descriptor
final StorageDescriptor sd = resolveStorageDescriptor(t, p);
// Check that the table or partition isn't sorted, as we don't yet support that.
if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
LOG.error("Attempt to compact sorted table, which is not yet supported!");
txnHandler.markCleaned(ci);
continue;
}
final boolean isMajor = ci.isMajorCompaction();
final ValidTxnList txns =
TxnUtils.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
LOG.debug("ValidCompactTxnList: " + txns.writeToString());
txnHandler.setCompactionHighestTxnId(ci, txns.getHighWatermark());
final StringBuilder jobName = new StringBuilder(name);
jobName.append("-compactor-");
jobName.append(ci.getFullPartitionName());
// Determine who to run as
String runAs;
if (ci.runAs == null) {
runAs = findUserToRunAs(sd.getLocation(), t);
txnHandler.setRunAs(ci.id, runAs);
} else {
runAs = ci.runAs;
}
LOG.info("Starting " + ci.type.toString() + " compaction for " +
ci.getFullPartitionName());
final StatsUpdater su = StatsUpdater.init(ci, txnHandler.findColumnsWithStats(ci), conf,
runJobAsSelf(runAs) ? runAs : t.getOwner());
final CompactorMR mr = new CompactorMR();
launchedJob = true;
try {
if (runJobAsSelf(runAs)) {
mr.run(conf, jobName.toString(), t, sd, txns, ci, su, txnHandler);
} else {
UserGroupInformation ugi = UserGroupInformation.createProxyUser(t.getOwner(),
UserGroupInformation.getLoginUser());
ugi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
mr.run(conf, jobName.toString(), t, sd, txns, ci, su, txnHandler);
return null;
}
});
try {
FileSystem.closeAllForUGI(ugi);
} catch (IOException exception) {
LOG.error("Could not clean up file-system handles for UGI: " + ugi + " for " +
ci.getFullPartitionName(), exception);
}
}
txnHandler.markCompacted(ci);
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST)) {
mrJob = mr.getMrJob();
}
} catch (Exception e) {
LOG.error("Caught exception while trying to compact " + ci +
". Marking failed to avoid repeated failures, " + StringUtils.stringifyException(e));
txnHandler.markFailed(ci);
}
} catch (Throwable t) {
LOG.error("Caught an exception in the main loop of compactor worker " + name + ", " +
StringUtils.stringifyException(t));
}
// If we didn't try to launch a job it either means there was no work to do or we got
// here as the result of a communication failure with the DB. Either way we want to wait
// a bit before we restart the loop.
if (!launchedJob && !stop.get()) {
try {
Thread.sleep(SLEEP_TIME);
} catch (InterruptedException e) {
}
}
} while (!stop.get());
}
@Override
public void init(AtomicBoolean stop, AtomicBoolean looped) throws MetaException {
super.init(stop, looped);
StringBuilder name = new StringBuilder(hostname());
name.append("-");
name.append(getId());
this.name = name.toString();
setName(name.toString());
}
public JobConf getMrJob() {
return mrJob;
}
static final class StatsUpdater {
static final private Logger LOG = LoggerFactory.getLogger(StatsUpdater.class);
public static StatsUpdater init(CompactionInfo ci, List<String> columnListForStats,
HiveConf conf, String userName) {
return new StatsUpdater(ci, columnListForStats, conf, userName);
}
/**
* list columns for which to compute stats. This maybe empty which means no stats gathering
* is needed.
*/
private final List<String> columnList;
private final HiveConf conf;
private final String userName;
private final CompactionInfo ci;
private StatsUpdater(CompactionInfo ci, List<String> columnListForStats,
HiveConf conf, String userName) {
this.conf = conf;
this.userName = userName;
this.ci = ci;
if(!ci.isMajorCompaction() || columnListForStats == null || columnListForStats.isEmpty()) {
columnList = Collections.emptyList();
return;
}
columnList = columnListForStats;
}
/**
* todo: what should this do on failure? Should it rethrow? Invalidate stats?
*/
void gatherStats() throws IOException {
if(!ci.isMajorCompaction()) {
return;
}
if(columnList.isEmpty()) {
LOG.debug("No existing stats for " + ci.dbname + "." + ci.tableName + " found. Will not run analyze.");
return;//nothing to do
}
//e.g. analyze table page_view partition(dt='10/15/2014',country=’US’)
// compute statistics for columns viewtime
StringBuilder sb = new StringBuilder("analyze table ").append(ci.dbname).append(".").append(ci.tableName);
if(ci.partName != null) {
try {
sb.append(" partition(");
Map<String, String> partitionColumnValues = Warehouse.makeEscSpecFromName(ci.partName);
for(Map.Entry<String, String> ent : partitionColumnValues.entrySet()) {
sb.append(ent.getKey()).append("='").append(ent.getValue()).append("'");
}
sb.append(")");
}
catch(MetaException ex) {
throw new IOException(ex);
}
}
sb.append(" compute statistics for columns ");
for(String colName : columnList) {
sb.append(colName).append(",");
}
sb.setLength(sb.length() - 1);//remove trailing ,
LOG.info("running '" + sb.toString() + "'");
Driver d = new Driver(conf, userName);
SessionState localSession = null;
if(SessionState.get() == null) {
localSession = SessionState.start(new SessionState(conf));
}
try {
CommandProcessorResponse cpr = d.run(sb.toString());
if (cpr.getResponseCode() != 0) {
throw new IOException("Could not update stats for table " + ci.getFullTableName() +
(ci.partName == null ? "" : "/" + ci.partName) + " due to: " + cpr);
}
}
catch(CommandNeedRetryException cnre) {
throw new IOException("Could not update stats for table " + ci.getFullTableName() +
(ci.partName == null ? "" : "/" + ci.partName) + " due to: " + cnre.getMessage());
}
finally {
if(localSession != null) {
localSession.close();
}
}
}
}
}