/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.server.state.services; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.lang.Thread.UncaughtExceptionHandler; import java.lang.reflect.Type; import java.util.Map; import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.ambari.server.AmbariService; import org.apache.ambari.server.configuration.Configuration; import org.apache.ambari.server.controller.jmx.JMXMetricHolder; import org.apache.ambari.server.controller.utilities.ScalingThreadPoolExecutor; import org.apache.ambari.server.controller.utilities.StreamProvider; import org.apache.commons.io.IOUtils; import org.codehaus.jackson.map.DeserializationConfig; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.ObjectReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.collect.Sets; import com.google.common.util.concurrent.AbstractService; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; import com.google.gson.stream.JsonReader; import com.google.inject.Inject; /** * The {@link MetricsRetrievalService} is used as a headless, autonomous service * which encapsulates: * <ul> * <li>An {@link ExecutorService} for fullfilling remote metric URL requests * <li>A cache for JMX metrics * <li>A cache for REST metrics * </ul> * * Classes can inject an instance of this service in order to gain access to its * caches and request mechanism. * <p/> * Callers must submit a request to the service in order to reach out and pull * in remote metric data. Otherwise, the cache will never be populated. On the * first usage of this service, the cache will always be empty. On every * subsequent request, the data from the prior invocation of * {@link #submitRequest(MetricSourceType, StreamProvider, String)} will be available. * <p/> * Metric data is cached temporarily and is controlled by * {@link Configuration#getMetricsServiceCacheTimeout()}. * <p/> * In order to control throttling requests to the same endpoint, * {@link Configuration#isMetricsServiceRequestTTLCacheEnabled()} can be enabled * to allow for a fixed interval of time to pass between requests. */ @AmbariService public class MetricsRetrievalService extends AbstractService { /** * The type of web service hosting the metrics. */ public enum MetricSourceType { /** * JMX */ JMX, /** * REST */ REST } /** * Logger. */ protected final static Logger LOG = LoggerFactory.getLogger(MetricsRetrievalService.class); /** * The timeout for exceptions which are caught and then cached to prevent log * spamming. * * @see #s_exceptionCache */ private static final int EXCEPTION_CACHE_TIMEOUT_MINUTES = 20; /** * Exceptions from this service should not SPAM the logs; so cache exceptions * and log once every {@vale #EXCEPTION_CACHE_TIMEOUT_MINUTES} minutes. */ private static final Cache<String, Throwable> s_exceptionCache = CacheBuilder.newBuilder().expireAfterWrite( EXCEPTION_CACHE_TIMEOUT_MINUTES, TimeUnit.MINUTES).build(); /** * Configuration. */ @Inject private Configuration m_configuration; /** * Used for reading REST JSON responses. */ @Inject private Gson m_gson; /** * A cache of URL to parsed JMX beans */ private Cache<String, JMXMetricHolder> m_jmxCache; /** * A cache of URL to parsed REST data. */ private Cache<String, Map<String, String>> m_restCache; /** * The {@link Executor} which will handle all of the requests to load remote * metrics from URLs. */ private ThreadPoolExecutor m_threadPoolExecutor; /** * Used to parse remote JMX JSON into a {@link Map}. */ private final ObjectReader m_jmxObjectReader; /** * A thread-safe collection of all of the URL endpoints queued for processing. * This helps prevent the same endpoint from being queued multiple times. */ private final Set<String> m_queuedUrls = Sets.newConcurrentHashSet(); /** * An evicting cache which ensures that multiple requests for the same * endpoint are not executed back-to-back. When enabled, a fixed period of * time must pass before this service will make requests to a previously * retrieved endpoint. * <p/> * If this cache is not enabled, then it will be {@code null}. * <p/> * For simplicity, this is a cache of URL to URL. */ private Cache<String, String> m_ttlUrlCache; /** * The size of the worker queue (used for logged warnings about size). */ private int m_queueMaximumSize; /** * Constructor. * */ public MetricsRetrievalService() { ObjectMapper jmxObjectMapper = new ObjectMapper(); jmxObjectMapper.configure(DeserializationConfig.Feature.USE_ANNOTATIONS, false); m_jmxObjectReader = jmxObjectMapper.reader(JMXMetricHolder.class); } /** * {@inheritDoc} */ @Override protected void doStart() { // initialize the caches int jmxCacheExpirationMinutes = m_configuration.getMetricsServiceCacheTimeout(); m_jmxCache = CacheBuilder.newBuilder().expireAfterWrite(jmxCacheExpirationMinutes, TimeUnit.MINUTES).build(); m_restCache = CacheBuilder.newBuilder().expireAfterWrite(jmxCacheExpirationMinutes, TimeUnit.MINUTES).build(); // enable the TTL cache if configured; otherwise leave it as null int ttlSeconds = m_configuration.getMetricsServiceRequestTTL(); boolean ttlCacheEnabled = m_configuration.isMetricsServiceRequestTTLCacheEnabled(); if (ttlCacheEnabled) { m_ttlUrlCache = CacheBuilder.newBuilder().expireAfterWrite(ttlSeconds, TimeUnit.SECONDS).build(); } // iniitalize the executor service int corePoolSize = m_configuration.getMetricsServiceThreadPoolCoreSize(); int maxPoolSize = m_configuration.getMetricsServiceThreadPoolMaxSize(); m_queueMaximumSize = m_configuration.getMetricsServiceWorkerQueueSize(); int threadPriority = m_configuration.getMetricsServiceThreadPriority(); m_threadPoolExecutor = new ScalingThreadPoolExecutor(corePoolSize, maxPoolSize, 30, TimeUnit.SECONDS, m_queueMaximumSize); m_threadPoolExecutor.allowCoreThreadTimeOut(true); m_threadPoolExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.DiscardOldestPolicy()); ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat( "ambari-metrics-retrieval-service-thread-%d").setPriority( threadPriority).setUncaughtExceptionHandler( new MetricRunnableExceptionHandler()).build(); m_threadPoolExecutor.setThreadFactory(threadFactory); LOG.info( "Initializing the Metrics Retrieval Service with core={}, max={}, workerQueue={}, threadPriority={}", corePoolSize, maxPoolSize, m_queueMaximumSize, threadPriority); if (ttlCacheEnabled) { LOG.info("Metrics Retrieval Service request TTL cache is enabled and set to {} seconds", ttlSeconds); } } /** * Testing method for setting a synchronous {@link ThreadPoolExecutor}. * * @param threadPoolExecutor */ public void setThreadPoolExecutor(ThreadPoolExecutor threadPoolExecutor) { m_threadPoolExecutor = threadPoolExecutor; } /** * {@inheritDoc} */ @Override protected void doStop() { m_jmxCache.invalidateAll(); m_restCache.invalidateAll(); if (null != m_ttlUrlCache) { m_ttlUrlCache.invalidateAll(); } m_queuedUrls.clear(); m_threadPoolExecutor.shutdownNow(); } /** * Submit a {@link Runnable} for execution which retrieves metric data from * the supplied endpoint. This will run inside of an {@link ExecutorService} * to retrieve metric data from a URL endpoint and parse the result into a * cached value. * <p/> * Once metric data is retrieved it is cached. Data in the cache can be * retrieved via {@link #getCachedJMXMetric(String)} or * {@link #getCachedRESTMetric(String)}, depending on the type of metric * requested. * <p/> * Callers need not worry about invoking this mulitple times for the same URL * endpoint. A single endpoint will only be enqueued once regardless of how * many times this method is called until it has been fully retrieved and * parsed. If the last endpoint request was too recent, then this method will * opt to not make another call until the TTL period expires. * * @param type * the type of service hosting the metric (not {@code null}). * @param streamProvider * the {@link StreamProvider} to use to read from the remote * endpoint. * @param url * the URL to read from * * @see #getCachedJMXMetric(String) */ public void submitRequest(MetricSourceType type, StreamProvider streamProvider, String url) { // check to ensure that the request isn't already queued if (m_queuedUrls.contains(url)) { return; } // check to ensure that the request wasn't made too recently if (null != m_ttlUrlCache && null != m_ttlUrlCache.getIfPresent(url)) { return; } // log warnings if the queue size seems to be rather large BlockingQueue<Runnable> queue = m_threadPoolExecutor.getQueue(); int queueSize = queue.size(); if (queueSize > Math.floor(0.9f * m_queueMaximumSize)) { LOG.warn("The worker queue contains {} work items and is at {}% of capacity", queueSize, ((float) queueSize / m_queueMaximumSize) * 100); } // enqueue this URL m_queuedUrls.add(url); Runnable runnable = null; switch (type) { case JMX: runnable = new JMXRunnable(m_jmxCache, m_queuedUrls, m_ttlUrlCache, m_jmxObjectReader, streamProvider, url); break; case REST: runnable = new RESTRunnable(m_restCache, m_queuedUrls, m_ttlUrlCache, m_gson, streamProvider, url); break; default: LOG.warn("Unable to retrieve metrics for the unknown type {}", type); break; } if (null != runnable) { m_threadPoolExecutor.execute(runnable); } } /** * Gets a cached JMX metric in the form of a {@link JMXMetricHolder}. If there * is no metric data cached for the given URL, then {@code null} is returned. * <p/> * The onky way this cache is populated is by requesting the data to be loaded * asynchronously via * {@link #submitRequest(MetricSourceType, StreamProvider, String)} with the * {@link MetricSourceType#JMX} type. * * @param jmxUrl * the URL to retrieve cached data for (not {@code null}). * @return the metric, or {@code null} if none. */ public JMXMetricHolder getCachedJMXMetric(String jmxUrl) { return m_jmxCache.getIfPresent(jmxUrl); } /** * Gets a cached REST metric in the form of a {@link Map}. If there is no * metric data cached for the given URL, then {@code null} is returned. * <p/> * The onky way this cache is populated is by requesting the data to be loaded * asynchronously via * {@link #submitRequest(MetricSourceType, StreamProvider, String)} with the * {@link MetricSourceType#REST} type. * * @param restUrl * the URL to retrieve cached data for (not {@code null}). * @return the metric, or {@code null} if none. */ public Map<String, String> getCachedRESTMetric(String restUrl) { return m_restCache.getIfPresent(restUrl); } /** * Encapsulates the common logic for all metric {@link Runnable} instnaces. */ private static abstract class MetricRunnable implements Runnable { /** * An initialized stream provider to read the remote endpoint. */ protected final StreamProvider m_streamProvider; /** * A fully-qualified URL to read from. */ protected final String m_url; /** * The URLs which have been requested but not yet read. */ private final Set<String> m_queuedUrls; /** * An evicting cache used to control whether a request for a metric can be * made or if it is too soon after the last request. */ private final Cache<String, String> m_ttlUrlCache; /** * Constructor. * * @param streamProvider * the stream provider to read the URL with * @param url * the URL endpoint to read data from (JMX or REST) * @param queuedUrls * the URLs which are currently waiting to be processed. This * method will remove the specified URL from this {@link Set} when * it completes (successful or not). * @param ttlUrlCache * an evicting cache which is used to determine if a request for a * metric is too soon after the last request, or {@code null} if * requests can be made sequentially without any separation. */ private MetricRunnable(StreamProvider streamProvider, String url, Set<String> queuedUrls, Cache<String, String> ttlUrlCache) { m_streamProvider = streamProvider; m_url = url; m_queuedUrls = queuedUrls; m_ttlUrlCache = ttlUrlCache; } /** * {@inheritDoc} */ @Override public final void run() { // provide some profiling long startTime = 0; long endTime = 0; boolean isDebugEnabled = LOG.isDebugEnabled(); if (isDebugEnabled) { startTime = System.currentTimeMillis(); } InputStream inputStream = null; try { if (isDebugEnabled) { endTime = System.currentTimeMillis(); LOG.debug("Loading metric JSON from {} took {}ms", m_url, (endTime - startTime)); } // read the stream and process it inputStream = m_streamProvider.readFrom(m_url); processInputStreamAndCacheResult(inputStream); // cache the URL, but only after successful parsing of the response if (null != m_ttlUrlCache) { m_ttlUrlCache.put(m_url, m_url); } } catch (IOException exception) { LOG.debug("Removing cached values for url {}", m_url); // need to ensure old values are removed because they could be not valid if the state have changed. removeCachedMetricsForCurrentURL(); logException(exception, m_url); } catch (Exception exception) { logException(exception, m_url); } finally { IOUtils.closeQuietly(inputStream); // remove this URL from the list of queued URLs to ensure it will be // requested again m_queuedUrls.remove(m_url); } } /** * Removes metric values for current URL from cache. */ protected abstract void removeCachedMetricsForCurrentURL(); /** * Reads data from the specified {@link InputStream} and processes that into * a cachable value. The value will then be cached by this method. * * @param inputStream * @throws Exception */ protected abstract void processInputStreamAndCacheResult(InputStream inputStream) throws Exception; /** * Logs the exception for the URL exactly once and caches the fact that the * exception was logged. This is to prevent an endpoint being down from * spamming the logs. * * @param throwable * the exception to log (not {@code null}). * @param url * the URL associated with the exception (not {@code null}). */ final void logException(Throwable throwable, String url) { String cacheKey = buildCacheKey(throwable, url); if (null == s_exceptionCache.getIfPresent(cacheKey)) { // cache it and log it s_exceptionCache.put(cacheKey, throwable); LOG.error( "Unable to retrieve metrics from {}. Subsequent failures will be suppressed from the log for {} minutes.", url, EXCEPTION_CACHE_TIMEOUT_MINUTES, throwable); } } /** * Builds a unique cache key for the combination of {@link Throwable} and * {@link String} URL. * * @param throwable * @param url * @return the key, such as <code>IOException-http://www.server.com/jmx</code>. */ private String buildCacheKey(Throwable throwable, String url) { if (null == throwable || null == url) { return ""; } String throwableName = throwable.getClass().getSimpleName(); return throwableName + "-" + url; } } /** * A {@link Runnable} used to retrieve JMX data from a remote URL endpoint. * There is no need for a {@link Callable} here since the * {@link MetricsRetrievalService} doesn't care about when the value returns or * whether an exception is thrown. */ private static final class JMXRunnable extends MetricRunnable { private final ObjectReader m_jmxObjectReader; private final Cache<String, JMXMetricHolder> m_cache; /** * Constructor. * * @param cache * @param queuedUrls * @param ttlUrlCache * @param jmxObjectReader * @param streamProvider * @param jmxUrl */ private JMXRunnable(Cache<String, JMXMetricHolder> cache, Set<String> queuedUrls, Cache<String, String> ttlUrlCache, ObjectReader jmxObjectReader, StreamProvider streamProvider, String jmxUrl) { super(streamProvider, jmxUrl, queuedUrls, ttlUrlCache); m_cache = cache; m_jmxObjectReader = jmxObjectReader; } /** * {@inheritDoc} */ @Override protected void removeCachedMetricsForCurrentURL() { m_cache.invalidate(m_url); } /** * {@inheritDoc} */ @Override protected void processInputStreamAndCacheResult(InputStream inputStream) throws Exception { JMXMetricHolder jmxMetricHolder = m_jmxObjectReader.readValue(inputStream); m_cache.put(m_url, jmxMetricHolder); } } /** * A {@link Runnable} used to retrieve REST data from a remote URL endpoint. * There is no need for a {@link Callable} here since the * {@link MetricsRetrievalService} doesn't care about when the value returns * or whether an exception is thrown. */ private static final class RESTRunnable extends MetricRunnable { private final Gson m_gson; private final Cache<String, Map<String, String>> m_cache; /** * Constructor. * * @param cache * @param queuedUrls * @param ttlUrlCache * @param gson * @param streamProvider * @param restUrl */ private RESTRunnable(Cache<String, Map<String, String>> cache, Set<String> queuedUrls, Cache<String, String> ttlUrlCache, Gson gson, StreamProvider streamProvider, String restUrl) { super(streamProvider, restUrl, queuedUrls, ttlUrlCache); m_cache = cache; m_gson = gson; } /** * {@inheritDoc} */ @Override protected void removeCachedMetricsForCurrentURL() { m_cache.invalidate(m_url); } /** * {@inheritDoc} */ @Override protected void processInputStreamAndCacheResult(InputStream inputStream) throws Exception { Type type = new TypeToken<Map<Object, Object>>() {}.getType(); JsonReader jsonReader = new JsonReader( new BufferedReader(new InputStreamReader(inputStream))); Map<String, String> jsonMap = m_gson.fromJson(jsonReader, type); m_cache.put(m_url, jsonMap); } } /** * A default exception handler. */ private static final class MetricRunnableExceptionHandler implements UncaughtExceptionHandler { /** * {@inheritDoc} */ @Override public void uncaughtException(Thread t, Throwable e) { LOG.error("Asynchronous metric retrieval encountered an exception with thread {}", t, e); } } }