/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.max.vm.profilers.sampling;
import java.util.*;
import com.sun.max.annotate.*;
import com.sun.max.vm.*;
import com.sun.max.vm.actor.member.*;
import com.sun.max.vm.runtime.*;
import com.sun.max.vm.stack.*;
import com.sun.max.vm.thread.VmThread;
import com.sun.max.unsafe.*;
/**
* Sampling profiler. Runs a thread that periodically wakes up, stops all the threads, and records their stack.
* There is a singleton instance per VM and most state is static/global. Note that the stack is gathered regardless
* of the state of the thread, e.g., it may be blocked.
*
* Attempts to allocate minimal heap memory to limit interference with the application.
* The strategy is based on the assumption that the same stack traces will occur frequently.
* The basic data structure is a map from {@link StackInfo} to a list of {@link ThreadSample}
* instances which hold the sample count for each thread.
*
* A singleton global instance, {@link #workingStackInfo}, of {@link StackInfo} is used to gather the stack for a thread,
* and an exact-length copy is entered into the map when a new stack is discovered.
*
* Note that due to the way the {@link VmOperation} works, all threads will be stopped in a
* native method at the time the stack is gathered. Some threads may indeed be blocked
* on a native method called by the thread. Others will be in the monitor code after
* taking the trap that starts the thread stopping machinery. These stack frames should not
* be presented to the user. This is handled by {@link SamplingStackTraceVisitor#clear()}.
* Unfortunately this does mean that the stack depth control can't be honored trivially
* as the stack is being gathered. This is optimized with {@link #workingStackClearSeen}
*
* The {@link #terminate} method outputs the data at VM shutdown, but it can also be dumped
* periodically. Data is output using the {@link Log} class. By default output is sorted by thread and by sample count
* This has more allocation overhead at the time of output and so is the default only if data is output at
* VM termination. In unsorted mode the stack traces and samples counts are output in an arbitrary order.
*/
public final class SamplingProfiler extends Thread {
private static final int DEFAULT_FREQUENCY = 10;
/**
* This is a conservative empirically derived number that includes the {@link VmOperation}
* frames for a stopped thread.
*/
private static final int MINIMUM_DEPTH = 16;
/**
* The default depth is quite large mostly because of the depth of the current monitor lock acquisition call stack.
*/
private static final int DEFAULT_DEPTH = 4;
private static final Random rand = new Random();
/**
* The base period in milliseconds between activations of the profiler.
*/
@CONSTANT_WHEN_NOT_ZERO
private static int sampleFrequency;
/**
* To mitigate strobe effects the profiler activations are randomized around {@link #sampleFrequency}.
* The actual activation period is in the range {@code sampleFrequency - jiggle <-> sampleFrequency + jiggle}.
*/
@CONSTANT_WHEN_NOT_ZERO
private static int jiggle;
/**
* The maximum stack depth the profiler will ever gather.
* N.B. This applies to "user" frames, not the raw frames that include those caused
* by the {@link VmOperation} thread stopping mechanism.
*/
@CONSTANT_WHEN_NOT_ZERO
private static int maxStackDepth;
/**
* Used as a scratch object for the working stack being analyzed, to avoid excessive heap allocation.
* Stacks are analyzed serially and the (working) stack being analyzed is built up in this object,
* which is reset prior to the analysis. The map lookup uses this object and only if the stack has not
* been seen before is a new {@link StackInfo} object allocated and the contents copied in.
* Therefore, once an application reaches a steady-state, allocations should be minimal.
*
*/
private static StackInfo workingStackInfo;
/**
* Records the depth for the working stack being analyzed.
*/
private static int workingStackDepth;
/**
* This value is set {@code false} before any stack is gathered and set {@code true}
* if and when {@link SamplingStackTraceVisitor#clear} is called.
*/
private static boolean workingStackClearSeen;
/**
* Allows profiling to be turned off temporarily.
*/
private static volatile boolean isProfiling;
/**
* Number of samples collected.
*/
private static int sampleCount;
/**
* Period in milliseconds between dumping the traces to the log.
* Zero implies only dump on VM termination.
*/
private static long dumpInterval;
/**
* The profiler thread itself.
*/
private static VmThread theProfiler;
/**
* {@code true} if an only if we are generating sorted output.
*/
private static boolean sortedOutput;
/**
* Produces "flat" output like Hotspot. This implies {@link #sortedOutput} {@code = true}
* and {@link #maxStackDepth} {@code = 1}.
*/
private static boolean flat;
/**
* {@code true} if and only if we are tracking (system) VM threads.
*/
private static boolean trackSystemThreads;
/**
* A debugging aid; logs the time at which the sampling thread woke up.
*/
private static boolean logSampleTimes;
/**
* For each unique stack trace, we record the list of threads with that trace and their sample count.
*/
private static Map<StackInfo, List<ThreadSample>> stackInfoMap = new HashMap<StackInfo, List<ThreadSample>>();
private SamplingProfiler() {
super(VmThread.systemThreadGroup, "SamplingProfiler");
setDaemon(true);
}
/**
* Create the profiler with the options given by {@code optionValue}.
* @param optionValue a string of the form {@code :frequency=f,stackdepth=d,dump=t} where any element may be omitted.
*/
public static void create(String optionValue) {
int frequency = 0;
int stackDepth = 0;
int dumpPeriod = 0;
boolean sortedOutputOptionSet = false;
boolean flatOptionSet = false;
if (optionValue.length() > 0) {
if (optionValue.charAt(0) == ':') {
String[] options = optionValue.substring(1).split(",");
for (String option : options) {
if (option.startsWith("frequency")) {
frequency = getOption(option);
} else if (option.startsWith("depth")) {
stackDepth = getOption(option);
if (stackDepth < 0) {
usage();
}
} else if (option.startsWith("dump")) {
dumpPeriod = getOption(option);
} else if (option.startsWith("debug")) {
logSampleTimes = true;
} else if (option.startsWith("systhreads")) {
trackSystemThreads = getBoolOption(option);
} else if (option.equals("sort")) {
sortedOutputOptionSet = getBoolOption(option);
} else if (option.startsWith("flat")) {
flatOptionSet = getBoolOption(option);
} else {
usage();
}
}
} else {
usage();
}
}
// if sort option is set, honor it, otherwise default based on dump
sortedOutput = sortedOutputOptionSet ? true : dumpPeriod == 0;
flat = flatOptionSet ? true : dumpPeriod == 0;
if (flat) {
sortedOutput = true;
stackDepth = 1;
}
create(frequency, stackDepth, dumpPeriod);
}
private static void usage() {
System.err.println("usage: -Xprof:frequency=f,depth=d,systhreads,dump=t,sort[=t],flat[=t]");
MaxineVM.native_exit(1);
}
private static boolean getBoolOption(String s) {
final int index = s.indexOf('=');
if (index < 0) {
return true;
}
return Boolean.parseBoolean(s.substring(index + 1));
}
private static int getOption(String s) {
final int index = s.indexOf('=');
if (index < 0) {
usage();
}
return Integer.parseInt(s.substring(index + 1));
}
/**
* Create a sample-based profiler with given measurement frequency, stack depth and dump period.
* @param frequency base period for measurements in millisecs, 0 implies {@value #DEFAULT_FREQUENCY}
* @param depth stack depth to record, 0 implies {@value #DEFAULT_DEPTH}
* @param dumpPeriod time in seconds between dumps to log, 0 implies only at termination (default)
*/
private static void create(int frequency, int depth, int dumpPeriod) {
sampleFrequency = frequency == 0 ? DEFAULT_FREQUENCY : frequency;
jiggle = sampleFrequency / 10;
if (jiggle <= 2) {
jiggle = 2;
}
maxStackDepth = depth == 0 ? DEFAULT_DEPTH : depth;
dumpInterval = dumpPeriod * 1000000000L;
workingStackInfo = new StackInfo(Math.max(MINIMUM_DEPTH, maxStackDepth));
final Thread profileThread = new SamplingProfiler();
isProfiling = true;
profileThread.start();
}
@Override
public void run() {
theProfiler = VmThread.fromJava(this);
long lastDump = System.nanoTime();
while (true) {
try {
final int thisJiggle = rand.nextInt(jiggle);
final int thisPeriod = sampleFrequency + (rand.nextBoolean() ? thisJiggle : -thisJiggle);
Thread.sleep(thisPeriod);
final long now = System.nanoTime();
if (isProfiling) {
if (logSampleTimes) {
boolean state = Log.lock();
Log.print("SamplingProfiler running at ");
Log.println(now);
Log.unlock(state);
}
stackTraceGatherer.submit();
sampleCount++;
if (dumpInterval > 0 && now > lastDump + dumpInterval) {
dumpTraces();
lastDump = now;
}
}
} catch (InterruptedException ex) {
}
}
}
/**
* Encapsulates the basic logic of handling one thread after all threads are frozen at a safepoint.
*/
private static final class StackTraceGatherer extends VmOperation {
StackTraceGatherer() {
super("SamplingProfiler", null, Mode.Safepoint);
}
@Override
protected boolean operateOnThread(VmThread thread) {
final boolean ignore = thread == theProfiler ||
(isSystemThread(thread) && !trackSystemThreads);
return !ignore;
}
@Override
public void doThread(VmThread vmThread, Pointer ip, Pointer sp, Pointer fp) {
SamplingStackTraceVisitor stv = new SamplingStackTraceVisitor();
final VmStackFrameWalker stackFrameWalker = vmThread.samplingProfilerStackFrameWalker();
workingStackInfo.reset(0);
workingStackDepth = 0;
workingStackClearSeen = false;
stv.walk(stackFrameWalker, ip, sp, fp);
if (!workingStackClearSeen) {
// we may have gathered > maxStackDepth frames; fix that here before we do the lookup
if (workingStackDepth > maxStackDepth) {
workingStackInfo.reset(maxStackDepth);
}
}
// Have we seen this stack before?
List<ThreadSample> threadSampleList = stackInfoMap.get(workingStackInfo);
if (threadSampleList == null) {
threadSampleList = new ArrayList<ThreadSample>();
final StackInfo copy = workingStackInfo.copy(maxStackDepth);
List<ThreadSample> existing = stackInfoMap.put(copy, threadSampleList);
assert existing == null;
}
// Check if this thread has had this stack trace before, allocating a new ThreadSample instance if not
final ThreadSample threadSample = getThreadSample(threadSampleList, vmThread);
// bump the number of times the given thread has been in this state
threadSample.count++;
}
}
private static boolean isSystemThread(VmThread vmThread) {
return vmThread.javaThread().getThreadGroup() == VmThread.systemThreadGroup;
}
private static final StackTraceGatherer stackTraceGatherer = new StackTraceGatherer();
/**
* Allocation free stack frame analyzer that builds up the stack info in {@link SamplingProfiler#workingStackInfo}.
*/
public static class SamplingStackTraceVisitor extends StackTraceVisitor {
SamplingStackTraceVisitor() {
super(null);
}
@Override
public boolean add(ClassMethodActor classMethodActor, int sourceLineNumber) {
assert classMethodActor != null;
workingStackInfo.stack[workingStackDepth].classMethodActor = classMethodActor;
workingStackInfo.stack[workingStackDepth].lineNumber = sourceLineNumber;
workingStackDepth++;
return workingStackClearSeen ? workingStackDepth < maxStackDepth : workingStackDepth < workingStackInfo.stack.length;
}
@Override
public void clear() {
workingStackInfo.reset(0);
workingStackDepth = 0;
workingStackClearSeen = true;
}
@Override
public StackTraceElement[] getTrace() {
return null;
}
}
private static ThreadSample getThreadSample(List<ThreadSample> threadSampleList, VmThread vmThread) {
for (ThreadSample threadSample : threadSampleList) {
if (threadSample.vmThread == vmThread) {
return threadSample;
}
}
final ThreadSample threadSample = new ThreadSample(vmThread);
threadSampleList.add(threadSample);
return threadSample;
}
/**
* Value class that records a thread and a sample count.
*/
public static class ThreadSample {
VmThread vmThread;
private long count;
ThreadSample(VmThread vmThread) {
this.vmThread = vmThread;
}
static void print(VmThread vmThread) {
final Thread t = vmThread.javaThread();
Log.print("Thread id=");
Log.print(t.getId());
Log.print(", name=\"");
Log.print(t.getName());
Log.print("\"");
}
void print() {
print(vmThread);
Log.print(" count: ");
Log.print(count);
}
}
/**
* Value class that captures the essential information on a stack frame element.
*/
private static class StackElement {
ClassMethodActor classMethodActor;
int lineNumber; // < 0 if unknown
void print() {
printName();
Log.print('(');
if (classMethodActor.nativeFunction == null) {
Log.print(classMethodActor.holder().sourceFileName);
if (lineNumber > 0) {
Log.print(':');
Log.print(lineNumber);
}
} else {
Log.print("Native Method");
}
Log.println(')');
}
void printName() {
Log.print(classMethodActor.holder().name.toString());
Log.print('.');
Log.print(classMethodActor.name().toString());
}
}
/**
* The essential information on a sequence of frames, with support for comparison and hashing.
* The "logical" length of the stack is the number of elements, starting from zero,
* for which classMethodActor != null. Comparison and hashing use the logical length.
*/
public static class StackInfo {
StackElement[] stack;
StackInfo(int depth) {
stack = new StackElement[depth];
for (int i = 0; i < depth; i++) {
stack[i] = new StackElement();
}
}
protected StackInfo(StackElement[] stack) {
this.stack = stack;
}
void reset(int start) {
for (int i = start; i < stack.length; i++) {
stack[i].classMethodActor = null;
stack[i].lineNumber = -1;
}
}
@Override
public int hashCode() {
int result = 0;
for (StackElement s : stack) {
if (s.classMethodActor == null) {
break;
} else {
result ^= s.lineNumber ^ s.classMethodActor.hashCode();
}
}
return result;
}
@Override
public boolean equals(Object o) {
// array lengths may be different, but can still compare equal
final StackInfo osi = (StackInfo) o;
int min = stack.length;
StackInfo longer = null;
if (stack.length < osi.stack.length) {
longer = osi;
} else if (stack.length > osi.stack.length) {
min = osi.stack.length;
longer = this;
}
// compare up to min
for (int i = 0; i < min; i++) {
if ((osi.stack[i].classMethodActor != stack[i].classMethodActor) ||
osi.stack[i].lineNumber != stack[i].lineNumber) {
return false;
}
}
// same length or if longer is empty after min
return (longer == null) || longer.stack[min].classMethodActor == null;
}
/**
* Shallow copy of this object, truncating length.
* @return copied object
*/
StackInfo copy(int depth) {
final StackInfo result = new StackInfo(depth);
for (int i = 0; i < result.stack.length; i++) {
ClassMethodActor cm = this.stack[i].classMethodActor;
assert cm != null;
result.stack[i].classMethodActor = cm;
result.stack[i].lineNumber = this.stack[i].lineNumber;
}
return result;
}
}
public static void terminate() {
isProfiling = false;
dumpTraces();
}
private static void dumpTraces() {
Map<VmThread, CountedStackInfo[]> sortedInfo = null;
if (sortedOutput) {
sortedInfo = sortByThread();
}
boolean state = Log.lock();
Log.print("Maxine Sampling Profiler, #samples: ");
Log.println(sampleCount);
Log.println();
if (sortedOutput) {
dumpSortedOutput(sortedInfo);
} else {
for (Map.Entry<StackInfo, List<ThreadSample>> entry : stackInfoMap.entrySet()) {
final StackInfo stackInfo = entry.getKey();
final List<ThreadSample> threadSampleList = entry.getValue();
for (StackElement se : stackInfo.stack) {
se.print();
}
for (ThreadSample ti : threadSampleList) {
Log.print(" ");
ti.print();
Log.println();
}
Log.println();
}
}
Log.unlock(state);
}
/*
* All the code below here is only used for sorted output.
*/
private static void dumpSortedOutput(Map<VmThread, CountedStackInfo[]> sortedInfo) {
for (Map.Entry<VmThread, CountedStackInfo[]> entry : sortedInfo.entrySet()) {
ThreadSample.print(entry.getKey());
long totalSamples = 0;
for (CountedStackInfo countedStackInfo : entry.getValue()) {
totalSamples += countedStackInfo.count;
}
Log.print(", #samples: ");
Log.print(totalSamples);
Log.println();
for (CountedStackInfo countedStackInfo : entry.getValue()) {
// percentage to two decimal places, rounded
long p1000 = (countedStackInfo.count * 100000) / totalSamples;
long last = p1000 % 10;
long p100 = p1000 / 10;
if (last >= 5) {
p100++;
}
if (flat) {
printPercentage(p100);
Log.print(" ");
printCount(countedStackInfo.count);
Log.print(" ");
countedStackInfo.stack[0].printName();
} else {
Log.print("Sample count ");
Log.print(countedStackInfo.count);
Log.print(" (");
printPercentage(p100);
Log.println(")");
for (StackElement se : countedStackInfo.stack) {
se.print();
}
}
Log.println();
}
Log.println();
}
}
private static void printPercentage(long p100) {
long d1 = p100 / 100;
if (d1 < 10) {
Log.print(' ');
}
Log.print(d1);
Log.print('.');
long d2 = p100 % 100;
if (d2 < 10) {
Log.print('0');
}
Log.print(d2);
Log.print("%");
}
private static void printCount(long count) {
long c = count;
int s = 0;
while (c > 0) {
c = c / 10;
s++;
}
for (int i = 0; i < 8 - s; i++) {
Log.print(' ');
}
Log.print(count);
}
/**
* Used when sorted output is generated. It effectively pairs a {@link StackInfo} with a
* sample count for a thread.
*/
private static class CountedStackInfo extends StackInfo implements Comparator<CountedStackInfo> {
private long count;
CountedStackInfo(StackInfo stackInfo, long count) {
super(stackInfo.stack);
this.count = count;
}
@Override
public int compare(CountedStackInfo a, CountedStackInfo b) {
// low count compares higher
if (a.count < b.count) {
return +1;
} else if (a.count > b.count) {
return -1;
} else {
return 0;
}
}
}
private static Map<VmThread, CountedStackInfo[]> sortByThread() {
Map <VmThread, ArrayList<CountedStackInfo>> tempMap = new HashMap<VmThread, ArrayList<CountedStackInfo>>();
for (Map.Entry<StackInfo, List<ThreadSample>> entry : stackInfoMap.entrySet()) {
final StackInfo stackInfo = entry.getKey();
final List<ThreadSample> threadSampleList = entry.getValue();
for (ThreadSample ti : threadSampleList) {
ArrayList<CountedStackInfo> threadCounts = tempMap.get(ti.vmThread);
if (threadCounts == null) {
threadCounts = new ArrayList<CountedStackInfo>();
tempMap.put(ti.vmThread, threadCounts);
}
threadCounts.add(new CountedStackInfo(stackInfo, ti.count));
}
}
Map<VmThread, CountedStackInfo[]> result = new HashMap<VmThread, CountedStackInfo[]>();
for (Map.Entry<VmThread, ArrayList<CountedStackInfo>> entry : tempMap.entrySet()) {
CountedStackInfo[] threadCountsArray = entry.getValue().toArray(new CountedStackInfo[entry.getValue().size()]);
Arrays.sort(threadCountsArray, threadCountsArray[0]);
result.put(entry.getKey(), threadCountsArray);
}
return result;
}
}