// Copyright 2016 Twitter. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.twitter.heron.common.utils.metrics; import java.lang.management.BufferPoolMXBean; import java.lang.management.GarbageCollectorMXBean; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.lang.management.MemoryPoolMXBean; import java.lang.management.MemoryUsage; import java.lang.management.OperatingSystemMXBean; import java.lang.management.RuntimeMXBean; import java.lang.management.ThreadInfo; import java.lang.management.ThreadMXBean; import java.time.Duration; import java.util.List; import com.twitter.heron.api.metric.AssignableMetric; import com.twitter.heron.api.metric.MeanReducer; import com.twitter.heron.api.metric.MeanReducerState; import com.twitter.heron.api.metric.MultiAssignableMetric; import com.twitter.heron.api.metric.ReducedMetric; import com.twitter.heron.common.basics.ByteAmount; import com.twitter.heron.common.basics.SingletonRegistry; import com.twitter.heron.common.config.SystemConfig; import com.twitter.heron.common.utils.misc.ThreadNames; /** * JVM metrics to be collected */ public class JVMMetrics { private final Runtime runtime = Runtime.getRuntime(); private final MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean(); private final RuntimeMXBean runtimeMXBean = ManagementFactory.getRuntimeMXBean(); private final OperatingSystemMXBean osMbean = ManagementFactory.getOperatingSystemMXBean(); private final ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean(); private final List<MemoryPoolMXBean> memoryPoolMXBeanList = ManagementFactory.getMemoryPoolMXBeans(); private final List<BufferPoolMXBean> bufferPoolMXBeanList = ManagementFactory.getPlatformMXBeans(BufferPoolMXBean.class); // Metric for time spent in GC per generational collection, and the sum total of all collections. private final MultiAssignableMetric<Long> jvmGCTimeMsPerGCType; // Metrics for count of GC per generational collection, and the sum total of all collections. private final MultiAssignableMetric<Long> jvmGCCountPerGCType; // Metric for total live jvm threads private final AssignableMetric<Integer> jvmThreadCount; // Metric for total live jvm daemon threads private final AssignableMetric<Integer> jvmDaemonThreadCount; // Metric for number of open file descriptors private final AssignableMetric<Long> fdCount; // Metric for max file descriptors allowed per jvm process private final AssignableMetric<Long> fdLimit; // The accumulated time spending on Garbage Collection in MilliSeconds private AssignableMetric<Long> jvmGCTimeMs; // The accumulated account of JVM Garbage Collection private AssignableMetric<Long> jvmGCCount; // The JVM up times private AssignableMetric<Long> jvmUpTimeSecs; /* * Returns the CPU time used by the process on which the Java virtual machine is running in nanoseconds. * The value is of nanoseconds precision but not necessarily nanoseconds accuracy. */ private AssignableMetric<Long> processCPUTimeNs; /* * Returns the total CPU time for a thread of the specified ID in nanoseconds. * The returned value is of nanoseconds precision but not necessarily nanoseconds accuracy. * If the implementation distinguishes between user mode time and system mode time, * the returned CPU time is the amount of time that the thread has executed in user mode * or system mode. * If the thread of the specified ID is not alive or does not exist, * this method returns -1. If CPU time measurement is disabled, this method returns -1. * A thread is alive if it has been started and has not yet died. * <p/> * If CPU time measurement is enabled after the thread has started, * the Java virtual machine implementation may choose any time up to and including the * time that the capability is enabled as the point where CPU time measurement starts. */ private MultiAssignableMetric<Long> threadsCPUTimeNs; // The cpu time used by threads other than SlaveThread and GatewayThread private AssignableMetric<Long> otherThreadsCPUTimeNs; /* * Returns the CPU time that a thread of the specified ID has executed in user mode in nanosecs. * The returned value is of nanoseconds precision but not necessarily nanoseconds accuracy. * If the thread of the specified ID is not alive or does not exist, this method returns -1. * If CPU time measurement is disabled, this method returns -1. * A thread is alive if it has been started and has not yet died. * <p/> * If CPU time measurement is enabled after the thread has started, * the Java virtual machine implementation may choose any time up to and including the * time that the capability is enabled as the point where CPU time measurement starts. */ private MultiAssignableMetric<Long> threadsUserCPUTimeNs; // The user cpu time used by threads other than SlaveThread and GatewayThread private AssignableMetric<Long> otherThreadsUserCPUTimeNs; /* * The "recent cpu usage" for the Java Virtual Machine process. * This value is a double in the [0.0,1.0] interval. * A value of 0.0 means that none of the CPUs were running threads from the JVM process * during the recent period of time observed, * while a value of 1.0 means that all CPUs were actively running threads from the JVM * 100% of the time during the recent period being observed. * Threads from the JVM include the application threads as well as the JVM internal threads. * All values betweens 0.0 and 1.0 are possible depending of the activities going on in * the JVM process and the whole system. If the Java Virtual Machine recent CPU usage is * not available, the method returns a negative value. */ private ReducedMetric<MeanReducerState, Number, Double> processCPULoad; // Metrics that measure memory, memory's heap and memory's non-heap private ReducedMetric<MeanReducerState, Number, Double> jvmMemoryFreeMB; private ReducedMetric<MeanReducerState, Number, Double> jvmMemoryUsedMB; private ReducedMetric<MeanReducerState, Number, Double> jvmMemoryTotalMB; private ReducedMetric<MeanReducerState, Number, Double> jvmMemoryHeapUsedMB; private ReducedMetric<MeanReducerState, Number, Double> jvmMemoryHeapCommittedMB; private ReducedMetric<MeanReducerState, Number, Double> jvmMemoryHeapMaxMB; private ReducedMetric<MeanReducerState, Number, Double> jvmMemoryNonHeapUsedMB; private ReducedMetric<MeanReducerState, Number, Double> jvmMemoryNonHeapCommittedMB; private ReducedMetric<MeanReducerState, Number, Double> jvmMemoryNonHeapMaxMB; // Gather metrics for different memory pools in heap, for instance: // Par Eden Space, Par Survivor Space, CMS Old Gen, CMS Perm Gen // The peak memory usage of a memory pool since the Java virtual machine was started // or since the peak was reset. private MultiAssignableMetric<Long> jvmPeakUsagePerMemoryPool; // The memory usage after the Java virtual machine most recently expended effort in recycling // unused objects in a memory pool. private MultiAssignableMetric<Long> jvmCollectionUsagePerMemoryPool; // An estimate of the memory usage of a memory pool. private MultiAssignableMetric<Long> jvmEstimatedUsagePerMemoryPool; /* * Metrics for mapped and direct buffer pool usage. */ private MultiAssignableMetric<Long> jvmBufferPoolMemoryUsage; public JVMMetrics() { jvmGCTimeMs = new AssignableMetric<>(0L); jvmGCCount = new AssignableMetric<>(0L); jvmGCCountPerGCType = new MultiAssignableMetric<>(0L); jvmGCTimeMsPerGCType = new MultiAssignableMetric<>(0L); jvmUpTimeSecs = new AssignableMetric<>(0L); jvmThreadCount = new AssignableMetric<>(0); jvmDaemonThreadCount = new AssignableMetric<>(0); processCPUTimeNs = new AssignableMetric<>(0L); threadsCPUTimeNs = new MultiAssignableMetric<>(0L); otherThreadsCPUTimeNs = new AssignableMetric<>(0L); threadsUserCPUTimeNs = new MultiAssignableMetric<>(0L); otherThreadsUserCPUTimeNs = new AssignableMetric<>(0L); processCPULoad = new ReducedMetric<>(new MeanReducer()); fdCount = new AssignableMetric<>(0L); fdLimit = new AssignableMetric<>(0L); jvmMemoryFreeMB = new ReducedMetric<>(new MeanReducer()); jvmMemoryUsedMB = new ReducedMetric<>(new MeanReducer()); jvmMemoryTotalMB = new ReducedMetric<>(new MeanReducer()); jvmMemoryHeapUsedMB = new ReducedMetric<>(new MeanReducer()); jvmMemoryHeapCommittedMB = new ReducedMetric<>(new MeanReducer()); jvmMemoryHeapMaxMB = new ReducedMetric<>(new MeanReducer()); jvmMemoryNonHeapUsedMB = new ReducedMetric<>(new MeanReducer()); jvmMemoryNonHeapCommittedMB = new ReducedMetric<>(new MeanReducer()); jvmMemoryNonHeapMaxMB = new ReducedMetric<>(new MeanReducer()); jvmPeakUsagePerMemoryPool = new MultiAssignableMetric<>(0L); jvmCollectionUsagePerMemoryPool = new MultiAssignableMetric<>(0L); jvmEstimatedUsagePerMemoryPool = new MultiAssignableMetric<>(0L); jvmBufferPoolMemoryUsage = new MultiAssignableMetric<>(0L); } /** * Register metrics with the metrics collector */ public void registerMetrics(MetricsCollector metricsCollector) { SystemConfig systemConfig = (SystemConfig) SingletonRegistry.INSTANCE.getSingleton( SystemConfig.HERON_SYSTEM_CONFIG); int interval = (int) systemConfig.getHeronMetricsExportInterval().getSeconds(); metricsCollector.registerMetric("__jvm-gc-collection-time-ms", jvmGCTimeMs, interval); metricsCollector.registerMetric("__jvm-gc-collection-count", jvmGCCount, interval); metricsCollector.registerMetric("__jvm-gc-time-ms", jvmGCTimeMsPerGCType, interval); metricsCollector.registerMetric("__jvm-gc-count", jvmGCCountPerGCType, interval); metricsCollector.registerMetric("__jvm-uptime-secs", jvmUpTimeSecs, interval); metricsCollector.registerMetric("__jvm-thread-count", jvmThreadCount, interval); metricsCollector.registerMetric("__jvm-daemon-thread-count", jvmDaemonThreadCount, interval); metricsCollector.registerMetric("__jvm-process-cpu-time-nanos", processCPUTimeNs, interval); metricsCollector.registerMetric("__jvm-threads-cpu-time-nanos", threadsCPUTimeNs, interval); metricsCollector.registerMetric( "__jvm-other-threads-cpu-time-nanos", otherThreadsCPUTimeNs, interval); metricsCollector.registerMetric( "__jvm-threads-user-cpu-time-nanos", threadsUserCPUTimeNs, interval); metricsCollector.registerMetric( "__jvm-other-threads-user-cpu-time-nanos", otherThreadsUserCPUTimeNs, interval); metricsCollector.registerMetric("__jvm-process-cpu-load", processCPULoad, interval); metricsCollector.registerMetric("__jvm-fd-count", fdCount, interval); metricsCollector.registerMetric("__jvm-fd-limit", fdLimit, interval); metricsCollector.registerMetric("__jvm-memory-free-mb", jvmMemoryFreeMB, interval); metricsCollector.registerMetric("__jvm-memory-used-mb", jvmMemoryUsedMB, interval); metricsCollector.registerMetric("__jvm-memory-mb-total", jvmMemoryTotalMB, interval); metricsCollector.registerMetric("__jvm-memory-heap-mb-used", jvmMemoryHeapUsedMB, interval); metricsCollector.registerMetric( "__jvm-memory-heap-mb-committed", jvmMemoryHeapCommittedMB, interval); metricsCollector.registerMetric("__jvm-memory-heap-mb-max", jvmMemoryHeapMaxMB, interval); metricsCollector.registerMetric( "__jvm-memory-non-heap-mb-used", jvmMemoryNonHeapUsedMB, interval); metricsCollector.registerMetric( "__jvm-memory-non-heap-mb-committed", jvmMemoryNonHeapCommittedMB, interval); metricsCollector.registerMetric( "__jvm-memory-non-heap-mb-max", jvmMemoryNonHeapMaxMB, interval); metricsCollector.registerMetric( "__jvm-peak-usage", jvmPeakUsagePerMemoryPool, interval); metricsCollector.registerMetric( "__jvm-collection-usage", jvmCollectionUsagePerMemoryPool, interval); metricsCollector.registerMetric( "__jvm-estimated-usage", jvmEstimatedUsagePerMemoryPool, interval); metricsCollector.registerMetric("__jvm-buffer-pool", jvmBufferPoolMemoryUsage, interval); } public Runnable getJVMSampleRunnable() { final Runnable sampleRunnable = new Runnable() { @Override public void run() { updateGcMetrics(); jvmUpTimeSecs.setValue(Duration.ofMillis(runtimeMXBean.getUptime()).getSeconds()); processCPUTimeNs.setValue(getProcessCPUTimeNs()); getThreadsMetrics(); // We multiple # of processors to measure a process cpu load based on cores rather than // overall machine processCPULoad.update(getProcessCPULoad() * runtime.availableProcessors()); updateFdMetrics(); updateMemoryPoolMetrics(); updateBufferPoolMetrics(); ByteAmount freeMemory = ByteAmount.fromBytes(runtime.freeMemory()); ByteAmount totalMemory = ByteAmount.fromBytes(runtime.totalMemory()); jvmMemoryFreeMB.update(freeMemory.asMegabytes()); jvmMemoryTotalMB.update(totalMemory.asMegabytes()); jvmMemoryUsedMB.update(totalMemory.asMegabytes() - freeMemory.asMegabytes()); jvmMemoryHeapUsedMB.update( ByteAmount.fromBytes(memoryBean.getHeapMemoryUsage().getUsed()).asMegabytes()); jvmMemoryHeapCommittedMB.update( ByteAmount.fromBytes(memoryBean.getHeapMemoryUsage().getCommitted()).asMegabytes()); jvmMemoryHeapMaxMB.update( ByteAmount.fromBytes(memoryBean.getHeapMemoryUsage().getMax()).asMegabytes()); jvmMemoryNonHeapUsedMB.update( ByteAmount.fromBytes(memoryBean.getNonHeapMemoryUsage().getUsed()).asMegabytes()); jvmMemoryNonHeapCommittedMB.update( ByteAmount.fromBytes(memoryBean.getNonHeapMemoryUsage().getCommitted()).asMegabytes()); jvmMemoryNonHeapMaxMB.update( ByteAmount.fromBytes(memoryBean.getNonHeapMemoryUsage().getMax()).asMegabytes()); } }; return sampleRunnable; } // Gather metrics related to both direct and mapped byte buffers in the jvm. // These metrics can be useful for diagnosing native memory usage. private void updateBufferPoolMetrics() { for (BufferPoolMXBean bufferPoolMXBean : bufferPoolMXBeanList) { String normalizedKeyName = bufferPoolMXBean.getName().replaceAll("[^\\w]", "-"); final ByteAmount memoryUsed = ByteAmount.fromBytes(bufferPoolMXBean.getMemoryUsed()); final ByteAmount totalCapacity = ByteAmount.fromBytes(bufferPoolMXBean.getTotalCapacity()); final ByteAmount count = ByteAmount.fromBytes(bufferPoolMXBean.getCount()); // The estimated memory the JVM is using for this buffer pool jvmBufferPoolMemoryUsage.safeScope(normalizedKeyName + "-memory-used") .setValue(memoryUsed.asMegabytes()); // The estimated total capacity of the buffers in this pool jvmBufferPoolMemoryUsage.safeScope(normalizedKeyName + "-total-capacity") .setValue(totalCapacity.asMegabytes()); // THe estimated number of buffers in this pool jvmBufferPoolMemoryUsage.safeScope(normalizedKeyName + "-count") .setValue(count.asMegabytes()); } } // Gather metrics for different memory pools in heap, for instance: // Par Eden Space, Par Survivor Space, CMS Old Gen, CMS Perm Gen private void updateMemoryPoolMetrics() { for (MemoryPoolMXBean memoryPoolMXBean : memoryPoolMXBeanList) { String normalizedKeyName = memoryPoolMXBean.getName().replaceAll("[^\\w]", "-"); MemoryUsage peakUsage = memoryPoolMXBean.getPeakUsage(); if (peakUsage != null) { jvmPeakUsagePerMemoryPool.safeScope(normalizedKeyName + "-used") .setValue(ByteAmount.fromBytes(peakUsage.getUsed()).asMegabytes()); jvmPeakUsagePerMemoryPool.safeScope(normalizedKeyName + "-committed") .setValue(ByteAmount.fromBytes(peakUsage.getCommitted()).asMegabytes()); jvmPeakUsagePerMemoryPool.safeScope(normalizedKeyName + "-max") .setValue(ByteAmount.fromBytes(peakUsage.getMax()).asMegabytes()); } MemoryUsage collectionUsage = memoryPoolMXBean.getCollectionUsage(); if (collectionUsage != null) { jvmCollectionUsagePerMemoryPool.safeScope(normalizedKeyName + "-used") .setValue(ByteAmount.fromBytes(collectionUsage.getUsed()).asMegabytes()); jvmCollectionUsagePerMemoryPool.safeScope(normalizedKeyName + "-committed") .setValue(ByteAmount.fromBytes(collectionUsage.getCommitted()).asMegabytes()); jvmCollectionUsagePerMemoryPool.safeScope(normalizedKeyName + "-max") .setValue(ByteAmount.fromBytes(collectionUsage.getMax()).asMegabytes()); } MemoryUsage estimatedUsage = memoryPoolMXBean.getUsage(); if (estimatedUsage != null) { jvmEstimatedUsagePerMemoryPool.safeScope(normalizedKeyName + "-used") .setValue(ByteAmount.fromBytes(estimatedUsage.getUsed()).asMegabytes()); jvmEstimatedUsagePerMemoryPool.safeScope(normalizedKeyName + "-committed") .setValue(ByteAmount.fromBytes(estimatedUsage.getCommitted()).asMegabytes()); jvmEstimatedUsagePerMemoryPool.safeScope(normalizedKeyName + "-max") .setValue(ByteAmount.fromBytes(estimatedUsage.getMax()).asMegabytes()); } } } private void getThreadsMetrics() { // Set the cpu usage for every single thread if (threadMXBean.isThreadCpuTimeSupported()) { threadMXBean.setThreadCpuTimeEnabled(true); long tmpOtherThreadsCpuTime = 0; long tmpOtherThreadsUserCpuTime = 0; for (long id : threadMXBean.getAllThreadIds()) { long cpuTime = threadMXBean.getThreadCpuTime(id); long cpuUserTime = threadMXBean.getThreadUserTime(id); ThreadInfo threadInfo = threadMXBean.getThreadInfo(id); if (threadInfo != null) { String threadName = threadInfo.getThreadName(); if (threadName.equals(ThreadNames.THREAD_GATEWAY_NAME) || threadName.equals(ThreadNames.THREAD_SLAVE_NAME)) { threadsCPUTimeNs.scope(threadName).setValue(cpuTime); threadsUserCPUTimeNs.scope(threadName).setValue(cpuUserTime); } else { tmpOtherThreadsCpuTime += cpuTime; tmpOtherThreadsUserCpuTime += cpuUserTime; } } } otherThreadsCPUTimeNs.setValue(tmpOtherThreadsCpuTime); otherThreadsUserCPUTimeNs.setValue(tmpOtherThreadsUserCpuTime); jvmThreadCount.setValue(threadMXBean.getThreadCount()); jvmDaemonThreadCount.setValue(threadMXBean.getDaemonThreadCount()); } } //All gc related metrics should be updated here private void updateGcMetrics() { updateGcTimes(); updateGcCounts(); } private void updateGcTimes() { long totalTimeMs = 0; for (GarbageCollectorMXBean bean : ManagementFactory.getGarbageCollectorMXBeans()) { long collectionTimeMs = bean.getCollectionTime(); totalTimeMs += collectionTimeMs; // Replace all non alpha-numeric characters to '-' String normalizedKeyName = bean.getName().replaceAll("[^\\w]", "-"); jvmGCTimeMsPerGCType.safeScope(normalizedKeyName).setValue(collectionTimeMs); } jvmGCTimeMs.setValue(totalTimeMs); } private void updateGcCounts() { long totalCount = 0; for (GarbageCollectorMXBean bean : ManagementFactory.getGarbageCollectorMXBeans()) { long collectionCount = bean.getCollectionCount(); totalCount += collectionCount; // Replace all non alpha-numeric characters to '-' String normalizedKeyName = bean.getName().replaceAll("[^\\w]", "-"); jvmGCCountPerGCType.safeScope(normalizedKeyName).setValue(collectionCount); } jvmGCCount.setValue(totalCount); } private long getProcessCPUTimeNs() { if (osMbean instanceof com.sun.management.OperatingSystemMXBean) { final com.sun.management.OperatingSystemMXBean sunOsMbean = (com.sun.management.OperatingSystemMXBean) osMbean; return sunOsMbean.getProcessCpuTime(); } return -1; } private double getProcessCPULoad() { if (osMbean instanceof com.sun.management.OperatingSystemMXBean) { final com.sun.management.OperatingSystemMXBean sunOsMbean = (com.sun.management.OperatingSystemMXBean) osMbean; return sunOsMbean.getProcessCpuLoad(); } return -1; } //Update file descriptor metrics private void updateFdMetrics() { if (osMbean instanceof com.sun.management.UnixOperatingSystemMXBean) { final com.sun.management.UnixOperatingSystemMXBean unix = (com.sun.management.UnixOperatingSystemMXBean) osMbean; fdCount.setValue(unix.getOpenFileDescriptorCount()); fdLimit.setValue(unix.getMaxFileDescriptorCount()); } } }