/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.query.tracker;
import java.io.Closeable;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import com.addthis.basis.util.Parameter;
import com.addthis.hydra.query.aggregate.DetailedStatusTask;
import com.addthis.hydra.query.aggregate.TaskSourceInfo;
import com.addthis.hydra.query.aggregate.TaskSourceOptionInfo;
import com.addthis.hydra.task.output.TaskDataOutput;
import com.addthis.hydra.util.LogUtil;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Counter;
import com.yammer.metrics.core.Gauge;
import com.yammer.metrics.core.Timer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class QueryTracker implements Closeable {
static final Logger log = LoggerFactory.getLogger(QueryTracker.class);
static final int MAX_FINISHED_CACHE_SIZE = Parameter.intValue("QueryCache.MAX_FINISHED_CACHE_SIZE", 50);
/**
* Contains the queries that are running
*/
final ConcurrentMap<String, QueryEntry> running = new ConcurrentHashMap<>();
final Cache<String, QueryEntryInfo> recentlyCompleted;
final TaskDataOutput eventLog;
/* metrics */
final Counter queryErrors = Metrics.newCounter(QueryTracker.class, "queryErrors");
final Timer queryMeter = Metrics.newTimer(QueryTracker.class, "queryMeter", TimeUnit.MILLISECONDS, TimeUnit.MINUTES);
final Gauge runningCount = Metrics.newGauge(QueryTracker.class, "RunningCount", new Gauge<Integer>() {
@Override
public Integer value() {
return running.size();
}
});
final Gauge blockedTasks = Metrics.newGauge(QueryTracker.class, "blockedTasks", new Gauge<Integer>() {
@Override
public Integer value() {
return blockedTaskStatsWithoutException()[0];
}
});
final Gauge maxTaskWaitTime = Metrics.newGauge(QueryTracker.class, "maxTaskWaitTime", new Gauge<Integer>() {
@Override
public Integer value() {
return blockedTaskStatsWithoutException()[1];
}
});
/**
* thread pool for query timeout watcher runs. Should only need one thread.
*/
private final ScheduledExecutorService timeoutWatcherService = new ScheduledThreadPoolExecutor(1,
new ThreadFactoryBuilder().setNameFormat("timeoutWatcher=%d").setDaemon(true).build());
public QueryTracker() {
this.recentlyCompleted = CacheBuilder.newBuilder()
.maximumSize(MAX_FINISHED_CACHE_SIZE).build();
// start timeoutWatcher
ScheduledFuture<?> watcherFuture = this.timeoutWatcherService.scheduleWithFixedDelay(
new TimeoutWatcher(running), 5, 5, TimeUnit.SECONDS);
checkForErrors(watcherFuture);
this.eventLog = LogUtil.newBundleOutputFromConfig("queries");
}
@Override public void close() {
eventLog.sendComplete();
}
public int getRunningCount() {
return running.size();
}
public List<QueryEntryInfo> getRunning() {
ArrayList<QueryEntryInfo> list = new ArrayList<>(running.size());
for (QueryEntry e : running.values()) {
list.add(e.getStat());
}
return list;
}
public QueryEntryInfo getCompletedQueryInfo(String uuid) {
return recentlyCompleted.asMap().get(uuid);
}
public QueryEntry getQueryEntry(String uuid) {
QueryEntry queryEntry = running.get(uuid); //first try running
return queryEntry;
}
public List<QueryEntryInfo> getCompleted() {
return new ArrayList<>(recentlyCompleted.asMap().values());
}
public boolean cancelRunning(String key) {
if ((key == null) || key.isEmpty()) {
return false;
}
QueryEntry entry = running.get(key);
if (entry != null) {
return entry.cancel();
} else {
log.warn("QT could not get entry from running -- was null : {}", key);
return false;
}
}
// makes sure the future object doesn't swallow any executor-startup related errors
private static void checkForErrors(ScheduledFuture<?> future) {
if (future.isDone()) {
try {
future.get();
} catch (InterruptedException e) {
throw new IllegalStateException("either inexplicably had to wait for an already " +
"complete future or thread triggered an unexpected" +
" and pre-existing interrupt condition.", e);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
} else {
throw new RuntimeException(cause);
}
}
}
}
private int[] blockedTaskStats() {
int blocked = 0;
int maxWaitTime = 0;
for(QueryEntry entry : this.running.values()) {
boolean isQueryBlocked = false;
for (TaskSourceInfo task : DetailedStatusTask.taskSourceInfo(entry.aggregator)) {
boolean isTaskBlocked = true;
for (TaskSourceOptionInfo option : task.options) {
isTaskBlocked = isTaskBlocked && !option.active;
}
blocked = blocked + (isTaskBlocked ? 1 : 0);
isQueryBlocked = isQueryBlocked || isTaskBlocked;
}
maxWaitTime = isQueryBlocked ? Math.max(maxWaitTime, (int)entry.getRunTime()) : maxWaitTime;
}
return new int[]{blocked, maxWaitTime};
}
// used for gauge metrics which cannot throw exceptions
private int[] blockedTaskStatsWithoutException() {
try {
return this.blockedTaskStats();
} catch(Exception e) {
log.info("error computing blocked task metrics: ", e);
return new int[]{-1, -1};
}
}
}