/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.llap.daemon.impl;
import java.lang.reflect.Field;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import java.util.concurrent.RunnableFuture;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.llap.LlapUtil;
import org.apache.hadoop.hive.llap.io.encoded.TezCounterSource;
import org.apache.log4j.MDC;
import org.apache.log4j.NDC;
import org.apache.tez.common.CallableWithNdc;
import org.apache.tez.common.counters.FileSystemCounter;
import org.apache.tez.common.counters.TezCounters;
import org.apache.tez.runtime.task.TaskRunner2Callable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Custom thread pool implementation that records per thread file system statistics in TezCounters.
* The way it works is we capture before and after snapshots of file system thread statistics,
* compute the delta difference in statistics and update them in tez task counters.
*/
public class StatsRecordingThreadPool extends ThreadPoolExecutor {
private static final Logger LOG = LoggerFactory.getLogger(StatsRecordingThreadPool.class);
// uncaught exception handler that will be set for all threads before execution
private Thread.UncaughtExceptionHandler uncaughtExceptionHandler;
public StatsRecordingThreadPool(final int corePoolSize, final int maximumPoolSize,
final long keepAliveTime,
final TimeUnit unit,
final BlockingQueue<Runnable> workQueue,
final ThreadFactory threadFactory) {
this(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory, null);
}
public StatsRecordingThreadPool(final int corePoolSize, final int maximumPoolSize,
final long keepAliveTime,
final TimeUnit unit,
final BlockingQueue<Runnable> workQueue,
final ThreadFactory threadFactory, Thread.UncaughtExceptionHandler handler) {
super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory);
this.uncaughtExceptionHandler = handler;
}
@Override
protected <T> RunnableFuture<T> newTaskFor(final Callable<T> callable) {
return new FutureTask(new WrappedCallable(callable, uncaughtExceptionHandler));
}
public void setUncaughtExceptionHandler(Thread.UncaughtExceptionHandler handler) {
this.uncaughtExceptionHandler = handler;
}
/**
* Callable that wraps the actual callable submitted to the thread pool and invokes completion
* listener in finally block.
*
* @param <V> - actual callable
*/
private static class WrappedCallable<V> implements Callable<V> {
private Callable<V> actualCallable;
private Thread.UncaughtExceptionHandler uncaughtExceptionHandler;
WrappedCallable(final Callable<V> callable,
final Thread.UncaughtExceptionHandler uncaughtExceptionHandler) {
this.actualCallable = callable;
this.uncaughtExceptionHandler = uncaughtExceptionHandler;
}
@Override
public V call() throws Exception {
Thread thread = Thread.currentThread();
// setup uncaught exception handler for the current thread
if (uncaughtExceptionHandler != null) {
thread.setUncaughtExceptionHandler(uncaughtExceptionHandler);
}
// clone thread local file system statistics
List<LlapUtil.StatisticsData> statsBefore = LlapUtil.cloneThreadLocalFileSystemStatistics();
setupMDCFromNDC(actualCallable);
try {
return actualCallable.call();
} finally {
updateFileSystemCounters(statsBefore, actualCallable);
MDC.clear();
}
}
private void setupMDCFromNDC(final Callable<V> actualCallable) {
if (actualCallable instanceof CallableWithNdc) {
CallableWithNdc callableWithNdc = (CallableWithNdc) actualCallable;
try {
// CallableWithNdc inherits from NDC only when call() is invoked. CallableWithNdc has to
// extended to provide access to its ndcStack that is cloned during creation. Until, then
// we will use reflection to access the private field.
// FIXME: HIVE-14243 follow to remove this reflection
Field field = callableWithNdc.getClass().getSuperclass().getDeclaredField("ndcStack");
field.setAccessible(true);
Stack ndcStack = (Stack) field.get(callableWithNdc);
final Stack clonedStack = (Stack) ndcStack.clone();
final String fragmentId = (String) clonedStack.pop();
final String queryId = (String) clonedStack.pop();
final String dagId = (String) clonedStack.pop();
MDC.put("dagId", dagId);
MDC.put("queryId", queryId);
MDC.put("fragmentId", fragmentId);
if (LOG.isDebugEnabled()) {
LOG.debug("Received dagId: {} queryId: {} instanceType: {}",
dagId, queryId, actualCallable.getClass().getSimpleName());
}
} catch (Exception e) {
LOG.warn("Not setting up MDC as NDC stack cannot be accessed reflectively for" +
" instance type: {} exception type: {}",
actualCallable.getClass().getSimpleName(), e.getClass().getSimpleName());
}
} else {
LOG.warn("Not setting up MDC as unknown callable instance type received: {}",
actualCallable.getClass().getSimpleName());
}
}
private void updateFileSystemCounters(final List<LlapUtil.StatisticsData> statsBefore,
final Callable<V> actualCallable) {
Thread thread = Thread.currentThread();
TezCounters tezCounters = null;
// add tez counters for task execution and llap io
if (actualCallable instanceof TaskRunner2Callable) {
TaskRunner2Callable taskRunner2Callable = (TaskRunner2Callable) actualCallable;
// counters for task execution side
tezCounters = taskRunner2Callable.addAndGetTezCounter(FileSystemCounter.class.getName());
} else if (actualCallable instanceof TezCounterSource) {
// Other counter sources (currently used in LLAP IO).
tezCounters = ((TezCounterSource) actualCallable).getTezCounters();
}
if (tezCounters != null) {
if (statsBefore != null) {
// if there are multiple stats for the same scheme (from different NameNode), this
// method will squash them together
Map<String, FileSystem.Statistics> schemeToStats = LlapUtil
.getCombinedFileSystemStatistics();
for (Map.Entry<String, FileSystem.Statistics> entry : schemeToStats.entrySet()) {
final String scheme = entry.getKey();
FileSystem.Statistics statistics = entry.getValue();
FileSystem.Statistics.StatisticsData threadFSStats = statistics
.getThreadStatistics();
List<LlapUtil.StatisticsData> allStatsBefore = LlapUtil
.getStatisticsForScheme(scheme, statsBefore);
long bytesReadDelta = 0;
long bytesWrittenDelta = 0;
long readOpsDelta = 0;
long largeReadOpsDelta = 0;
long writeOpsDelta = 0;
// there could be more scheme after execution as execution might be accessing a
// different filesystem. So if we don't find a matching scheme before execution we
// just use the after execution values directly without computing delta difference
if (allStatsBefore != null && !allStatsBefore.isEmpty()) {
for (LlapUtil.StatisticsData sb : allStatsBefore) {
bytesReadDelta += threadFSStats.getBytesRead() - sb.getBytesRead();
bytesWrittenDelta += threadFSStats.getBytesWritten() - sb.getBytesWritten();
readOpsDelta += threadFSStats.getReadOps() - sb.getReadOps();
largeReadOpsDelta += threadFSStats.getLargeReadOps() - sb.getLargeReadOps();
writeOpsDelta += threadFSStats.getWriteOps() - sb.getWriteOps();
}
} else {
bytesReadDelta = threadFSStats.getBytesRead();
bytesWrittenDelta = threadFSStats.getBytesWritten();
readOpsDelta = threadFSStats.getReadOps();
largeReadOpsDelta = threadFSStats.getLargeReadOps();
writeOpsDelta = threadFSStats.getWriteOps();
}
tezCounters.findCounter(scheme, FileSystemCounter.BYTES_READ)
.increment(bytesReadDelta);
tezCounters.findCounter(scheme, FileSystemCounter.BYTES_WRITTEN)
.increment(bytesWrittenDelta);
tezCounters.findCounter(scheme, FileSystemCounter.READ_OPS).increment(readOpsDelta);
tezCounters.findCounter(scheme, FileSystemCounter.LARGE_READ_OPS)
.increment(largeReadOpsDelta);
tezCounters.findCounter(scheme, FileSystemCounter.WRITE_OPS)
.increment(writeOpsDelta);
if (LOG.isDebugEnabled()) {
LOG.debug("Updated stats: instance: {} thread name: {} thread id: {} scheme: {} " +
"bytesRead: {} bytesWritten: {} readOps: {} largeReadOps: {} writeOps: {}",
actualCallable.getClass().getSimpleName(), thread.getName(), thread.getId(),
scheme, bytesReadDelta, bytesWrittenDelta, readOpsDelta, largeReadOpsDelta,
writeOpsDelta);
}
}
} else {
LOG.warn("File system statistics snapshot before execution of thread is null." +
"Thread name: {} id: {} allStats: {}", thread.getName(), thread.getId(),
statsBefore);
}
} else {
LOG.warn("TezCounters is null for callable type: {}",
actualCallable.getClass().getSimpleName());
}
}
}
}