/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.kylin.rest.service; import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.NavigableSet; import java.util.TreeSet; import java.util.concurrent.ConcurrentMap; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Pair; import org.apache.kylin.metadata.badquery.BadQueryHistoryManager; import org.apache.kylin.rest.request.SQLRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Maps; public class BadQueryDetector extends Thread { public static final int ONE_MB = 1024 * 1024; private static final Logger logger = LoggerFactory.getLogger(BadQueryDetector.class); private final ConcurrentMap<Thread, Entry> runningQueries = Maps.newConcurrentMap(); private final long detectionInterval; private final int alertMB; private final int alertRunningSec; private KylinConfig kylinConfig; private ArrayList<Notifier> notifiers = new ArrayList<Notifier>(); public BadQueryDetector() { super("BadQueryDetector"); this.setDaemon(true); this.kylinConfig = KylinConfig.getInstanceFromEnv(); this.detectionInterval = kylinConfig.getBadQueryDefaultDetectIntervalSeconds() * 1000L; this.alertMB = 100; this.alertRunningSec = kylinConfig.getBadQueryDefaultAlertingSeconds(); initNotifiers(); } public BadQueryDetector(long detectionInterval, int alertMB, int alertRunningSec) { super("BadQueryDetector"); this.setDaemon(true); this.detectionInterval = detectionInterval; this.alertMB = alertMB; this.alertRunningSec = alertRunningSec; this.kylinConfig = KylinConfig.getInstanceFromEnv(); initNotifiers(); } public static long getSystemAvailBytes() { Runtime runtime = Runtime.getRuntime(); long totalMemory = runtime.totalMemory(); // current heap allocated to the VM process long freeMemory = runtime.freeMemory(); // out of the current heap, how much is free long maxMemory = runtime.maxMemory(); // Max heap VM can use e.g. Xmx setting long usedMemory = totalMemory - freeMemory; // how much of the current heap the VM is using long availableMemory = maxMemory - usedMemory; // available memory i.e. Maximum heap size minus the current amount used return availableMemory; } public static int getSystemAvailMB() { return (int) (getSystemAvailBytes() / ONE_MB); } private void initNotifiers() { this.notifiers.add(new LoggerNotifier()); if (kylinConfig.getBadQueryPersistentEnabled()) { this.notifiers.add(new PersistenceNotifier()); } } public void registerNotifier(Notifier notifier) { notifiers.add(notifier); } private void notify(String adj, float runningSec, long startTime, String project, String sql, String user, Thread t) { for (Notifier notifier : notifiers) { try { notifier.badQueryFound(adj, runningSec, startTime, project, sql, user, t); } catch (Exception e) { logger.error("", e); } } } public void queryStart(Thread thread, SQLRequest sqlRequest, String user) { runningQueries.put(thread, new Entry(sqlRequest, user, thread)); } public void queryEnd(Thread thread) { runningQueries.remove(thread); } public void run() { while (true) { try { Thread.sleep(detectionInterval); } catch (InterruptedException e) { Thread.currentThread().interrupt(); // stop detection and exit return; } try { detectBadQuery(); } catch (Exception ex) { logger.error("", ex); } } } private void detectBadQuery() { long now = System.currentTimeMillis(); ArrayList<Entry> entries = new ArrayList<Entry>(runningQueries.values()); Collections.sort(entries); // report if query running long for (Entry e : entries) { float runningSec = (float) (now - e.startTime) / 1000; if (runningSec >= alertRunningSec) { notify("Slow", runningSec, e.startTime, e.sqlRequest.getProject(), e.sqlRequest.getSql(), e.user, e.thread); dumpStackTrace(e.thread); } else { break; // entries are sorted by startTime } } // report if low memory if (getSystemAvailMB() < alertMB) { logger.info("System free memory less than " + alertMB + " MB. " + entries.size() + " queries running."); } } // log the stack trace of bad query thread for further analysis private void dumpStackTrace(Thread t) { int maxStackTraceDepth = kylinConfig.getBadQueryStackTraceDepth(); int current = 0; StackTraceElement[] stackTrace = t.getStackTrace(); StringBuilder buf = new StringBuilder("Problematic thread 0x" + Long.toHexString(t.getId())); buf.append("\n"); for (StackTraceElement e : stackTrace) { if (++current > maxStackTraceDepth) { break; } buf.append("\t").append("at ").append(e.toString()).append("\n"); } logger.info(buf.toString()); } public interface Notifier { void badQueryFound(String adj, float runningSec, long startTime, String project, String sql, String user, Thread t); } private class LoggerNotifier implements Notifier { @Override public void badQueryFound(String adj, float runningSec, long startTime, String project, String sql, String user, Thread t) { logger.info("{} query has been running {} seconds (project:{}, thread: 0x{}, user:{}) -- {}", adj, runningSec, project, Long.toHexString(t.getId()), user, sql); } } private class PersistenceNotifier implements Notifier { BadQueryHistoryManager badQueryManager = BadQueryHistoryManager.getInstance(kylinConfig); String serverHostname; NavigableSet<Pair<Long, String>> cacheQueue = new TreeSet<>(new Comparator<Pair<Long, String>>() { @Override public int compare(Pair<Long, String> o1, Pair<Long, String> o2) { if (o1.equals(o2)) { return 0; } else if (o1.getFirst().equals(o2.getFirst())) { return o2.getSecond().compareTo(o2.getSecond()); } else { return (int) (o1.getFirst() - o2.getFirst()); } } }); public PersistenceNotifier() { try { serverHostname = InetAddress.getLocalHost().getHostName(); } catch (UnknownHostException e) { serverHostname = "Unknow"; logger.warn("Error in get current hostname.", e); } } @Override public void badQueryFound(String adj, float runningSec, long startTime, String project, String sql, String user, Thread t) { try { long cachingSeconds = (kylinConfig.getBadQueryDefaultAlertingSeconds() + 1) * 30L; Pair<Long, String> sqlPair = new Pair<>(startTime, sql); if (!cacheQueue.contains(sqlPair)) { badQueryManager.addEntryToProject(sql, startTime, adj, runningSec, serverHostname, t.getName(), user, project); cacheQueue.add(sqlPair); while (!cacheQueue.isEmpty() && (System.currentTimeMillis() - cacheQueue.first().getFirst() > cachingSeconds * 1000 || cacheQueue.size() > kylinConfig.getBadQueryHistoryNum() * 3)) { cacheQueue.pollFirst(); } } else { badQueryManager.updateEntryToProject(sql, startTime, adj, runningSec, serverHostname, t.getName(), user, project); } } catch (IOException e) { logger.error("Error in bad query persistence.", e); } } } private class Entry implements Comparable<Entry> { final SQLRequest sqlRequest; final long startTime; final Thread thread; final String user; Entry(SQLRequest sqlRequest, String user, Thread thread) { this.sqlRequest = sqlRequest; this.startTime = System.currentTimeMillis(); this.thread = thread; this.user = user; } @Override public int compareTo(Entry o) { return (int) (this.startTime - o.startTime); } } }