/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.server.controller.metrics; import java.util.Collections; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.ambari.server.configuration.Configuration; import org.apache.ambari.server.controller.internal.AbstractPropertyProvider; import org.apache.ambari.server.controller.internal.PropertyInfo; import org.apache.ambari.server.controller.jmx.JMXPropertyProvider; import org.apache.ambari.server.controller.spi.Predicate; import org.apache.ambari.server.controller.spi.PropertyProvider; import org.apache.ambari.server.controller.spi.Request; import org.apache.ambari.server.controller.spi.Resource; import org.apache.ambari.server.controller.spi.SystemException; import org.apache.ambari.server.controller.utilities.BufferedThreadPoolExecutorCompletionService; import org.apache.ambari.server.controller.utilities.ScalingThreadPoolExecutor; import com.google.common.base.Throwables; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.inject.Inject; /** * Unites common functionality for multithreaded metrics providers (JMX and REST * as of now). Shares the same pool of executor threads across all * implementations. * <p/> * <b>This {@link PropertyProvider} should not be mistaken for a way to perform * expensive operations, as it is still called as part of the incoming REST * Jetty request.</b> It is poor design to have UI threads from the web client * waiting on expensive operations from a {@link PropertyProvider}, even if they * are spread across multiple threads. * <p/> * Instead, this {@link PropertyProvider} is useful for spreading many small, * quick operations across a threadpool. This is why the known implementations * of this class (such as the {@link JMXPropertyProvider}) use a cache instead * of reaching out to network endpoints on their own. * <p/> * This is also why the {@link ThreadPoolExecutor} used here has an unbounded * worker queue and essentially a fixed core size to perform its work. When * {@link Callable}s are rejected because of a worker queue exhaustion, they are * never submitted for execution, yet the {@link Future} instance is still * returned. Therefore, if the queue is ever exhausted, incoming REST API * requests must wait the entire {@link CompletionService#poll(long, TimeUnit)} * timeout before skipping the result and returning control. * */ public abstract class ThreadPoolEnabledPropertyProvider extends AbstractPropertyProvider { protected static Configuration configuration; /** * Host states that make available metrics collection */ public static final Set<String> healthyStates = Collections.singleton("STARTED"); protected final String hostNamePropertyId; private final MetricHostProvider metricHostProvider; private final String clusterNamePropertyId; /** * Executor service is shared between all instances. */ private static ThreadPoolExecutor EXECUTOR_SERVICE; private static int THREAD_POOL_CORE_SIZE; private static int THREAD_POOL_MAX_SIZE; private static int THREAD_POOL_WORKER_QUEUE_SIZE; private static long COMPLETION_SERVICE_POLL_TIMEOUT; private static final long THREAD_POOL_TIMEOUT_MILLIS = 30000L; @Inject public static void init(Configuration configuration) { THREAD_POOL_CORE_SIZE = configuration.getPropertyProvidersThreadPoolCoreSize(); THREAD_POOL_MAX_SIZE = configuration.getPropertyProvidersThreadPoolMaxSize(); THREAD_POOL_WORKER_QUEUE_SIZE = configuration.getPropertyProvidersWorkerQueueSize(); COMPLETION_SERVICE_POLL_TIMEOUT = configuration.getPropertyProvidersCompletionServiceTimeout(); EXECUTOR_SERVICE = initExecutorService(); } private static final Cache<String, Throwable> exceptionsCache = CacheBuilder.newBuilder() .expireAfterWrite(5, TimeUnit.MINUTES) .build(); // ----- Constructors ------------------------------------------------------ /** * Construct a provider. * * @param componentMetrics map of metrics for this provider */ public ThreadPoolEnabledPropertyProvider(Map<String, Map<String, PropertyInfo>> componentMetrics, String hostNamePropertyId, MetricHostProvider metricHostProvider, String clusterNamePropertyId) { super(componentMetrics); this.hostNamePropertyId = hostNamePropertyId; this.metricHostProvider = metricHostProvider; this.clusterNamePropertyId = clusterNamePropertyId; } // ----- Thread pool ------------------------------------------------------- /** * Generates thread pool with default parameters */ private static ThreadPoolExecutor initExecutorService() { ThreadPoolExecutor threadPoolExecutor = new ScalingThreadPoolExecutor( THREAD_POOL_CORE_SIZE, THREAD_POOL_MAX_SIZE, THREAD_POOL_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS, THREAD_POOL_WORKER_QUEUE_SIZE); threadPoolExecutor.allowCoreThreadTimeOut(true); ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat( "ambari-property-provider-thread-%d").build(); threadPoolExecutor.setThreadFactory(threadFactory); return threadPoolExecutor; } // ----- Common PropertyProvider implementation details -------------------- @Override public Set<Resource> populateResources(Set<Resource> resources, Request request, Predicate predicate) throws SystemException { if(!checkAuthorizationForMetrics(resources, clusterNamePropertyId)) { return resources; } // Get a valid ticket for the request. final Ticket ticket = new Ticket(); // in most cases, the buffered completion service will not be utlized for // its advantages since the worker queue is unbounded. However, if is // configured with a boundary, then the buffered service ensures that no // requests are discarded. final CompletionService<Resource> completionService = new BufferedThreadPoolExecutorCompletionService<>(EXECUTOR_SERVICE); // In a large cluster we could have thousands of resources to populate here. // Distribute the work across multiple threads. for (Resource resource : resources) { completionService.submit( getPopulateResourceCallable(resource, request, predicate, ticket)); } Set<Resource> keepers = new HashSet<>(); try { for (int i = 0; i < resources.size(); ++i) { Future<Resource> resourceFuture = completionService.poll(COMPLETION_SERVICE_POLL_TIMEOUT, TimeUnit.MILLISECONDS); if (resourceFuture == null) { // its been more than the populateTimeout since the last callable // completed ... // invalidate the ticket to abort the threads and don't wait any // longer ticket.invalidate(); LOG.error("Timed out after waiting {}ms waiting for request {}", COMPLETION_SERVICE_POLL_TIMEOUT, request); // stop iterating break; } // future should already be completed... no need to wait on get Resource resource = resourceFuture.get(); if (resource != null) { keepers.add(resource); } } } catch (InterruptedException e) { logException(e); } catch (ExecutionException e) { rethrowSystemException(e.getCause()); } return keepers; } /** * Get a callable that can be used to populate the given resource. * * @param resource the resource to be populated * @param request the request * @param predicate the predicate * @param ticket a valid ticket * * @return a callable that can be used to populate the given resource */ private Callable<Resource> getPopulateResourceCallable( final Resource resource, final Request request, final Predicate predicate, final Ticket ticket) { return new Callable<Resource>() { @Override public Resource call() throws SystemException { return populateResource(resource, request, predicate, ticket); } }; } /** * Populate a resource by obtaining the requested JMX properties. * * @param resource the resource to be populated * @param request the request * @param predicate the predicate * @return the populated resource; null if the resource should NOT be part of the result set for the given predicate */ protected abstract Resource populateResource(Resource resource, Request request, Predicate predicate, Ticket ticket) throws SystemException; /** * Set the populate timeout value for this provider. * * @param populateTimeout the populate timeout value */ protected void setPopulateTimeout(long populateTimeout) { COMPLETION_SERVICE_POLL_TIMEOUT = populateTimeout; } // ----- helper methods ---------------------------------------------------- /** * Determine whether or not the given property id was requested. */ protected static boolean isRequestedPropertyId(String propertyId, String requestedPropertyId, Request request) { return request.getPropertyIds().isEmpty() || propertyId.startsWith(requestedPropertyId); } protected static String getCacheKeyForException(final Throwable throwable) { if (throwable == null) { return ""; } StringBuilder sb = new StringBuilder(); for (Throwable t : Throwables.getCausalChain(throwable)) { if (t != null) { sb.append(t.getClass().getName()); } sb.append('\n'); } return sb.toString(); } /** * Log an error for the given exception. * * @param throwable the caught exception * * @return the error message that was logged */ protected static String logException(final Throwable throwable) { final String msg = "Caught exception getting metrics : " + throwable.getLocalizedMessage(); // JsonParseException includes InputStream's hash code into the message. // getMessage and printStackTrace returns a different String every time. String cacheKey = getCacheKeyForException(throwable); if (LOG.isDebugEnabled()) { LOG.debug(msg, throwable); } else { try { exceptionsCache.get(cacheKey, new Callable<Throwable>() { @Override public Throwable call() { LOG.error(msg + ", skipping same exceptions for next 5 minutes", throwable); return throwable; } }); } catch (ExecutionException ignored) { } } return msg; } /** * Rethrow the given exception as a System exception and log the message. * * @param throwable the caught exception * * @throws org.apache.ambari.server.controller.spi.SystemException always around the given exception */ protected static void rethrowSystemException(Throwable throwable) throws SystemException { String msg = logException(throwable); if (throwable instanceof SystemException) { throw (SystemException) throwable; } throw new SystemException (msg, throwable); } /** * Returns a hostname for component */ public String getHost(Resource resource, String clusterName, String componentName) throws SystemException { return hostNamePropertyId == null ? metricHostProvider.getHostName(clusterName, componentName) : (String) resource.getPropertyValue(hostNamePropertyId); } /** * Get complete URL from parts */ protected String getSpec(String protocol, String hostName, String port, String url) { return protocol + "://" + hostName + ":" + port + url; } // ----- inner class : Ticket ---------------------------------------------- /** * Ticket used to cancel provider threads. The provider threads should * monitor the validity of the passed in ticket and bail out if it becomes * invalid (as in a timeout). */ protected static class Ticket { /** * Indicate whether or not the ticket is valid. */ private volatile boolean valid = true; /** * Invalidate the ticket. */ public void invalidate() { valid = false; } /** * Determine whether or not this ticket is valid. * * @return true if the ticket is valid */ public boolean isValid() { return valid; } } }