/* * NOTE: This copyright does *not* cover user programs that use Hyperic * program services by normal system calls through the application * program interfaces provided as part of the Hyperic Plug-in Development * Kit or the Hyperic Client Development Kit - this is merely considered * normal use of the program, and does *not* fall under the heading of * "derived work". * * Copyright (C) [2004-2010], VMware, Inc. * This file is part of Hyperic. * * Hyperic is free software; you can redistribute it and/or modify * it under the terms version 2 of the GNU General Public License as * published by the Free Software Foundation. This program is distributed * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A * PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */ package org.hyperic.hq.measurement; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.hyperic.hq.appdef.server.session.Platform; import org.hyperic.hq.appdef.shared.PlatformManager; import org.hyperic.hq.appdef.shared.PlatformNotFoundException; import org.hyperic.hq.authz.server.session.AuthzSubject; import org.hyperic.hq.authz.server.session.Resource; import org.hyperic.hq.authz.server.session.ResourceEdge; import org.hyperic.hq.authz.shared.AuthzSubjectManager; import org.hyperic.hq.authz.shared.PermissionException; import org.hyperic.hq.authz.shared.ResourceManager; import org.hyperic.hq.common.DiagnosticObject; import org.hyperic.hq.common.DiagnosticsLogger; import org.hyperic.hq.context.Bootstrap; import org.hyperic.hq.ha.HAUtil; import org.hyperic.hq.measurement.server.session.Measurement; import org.hyperic.hq.measurement.server.session.MeasurementTemplate; import org.hyperic.hq.measurement.server.session.MetricDataCache; import org.hyperic.hq.measurement.shared.AvailabilityManager; import org.hyperic.hq.measurement.shared.MeasurementManager; import org.hyperic.hq.product.MetricValue; import org.hyperic.util.TimeUtil; import org.hyperic.util.Transformer; import org.hyperic.util.timer.StopWatch; import org.springframework.beans.BeansException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContextAware; import org.springframework.context.ApplicationListener; import org.springframework.context.event.ContextRefreshedEvent; import org.springframework.stereotype.Component; @Component("metricsNotComingInDiagnostic") public class MetricsNotComingInDiagnostic implements DiagnosticObject, ApplicationContextAware, ApplicationListener<ContextRefreshedEvent> { private final Log log = LogFactory.getLog(MetricsNotComingInDiagnostic.class); private AtomicLong last = new AtomicLong(now() - 1000*60*60*11); // 12 hours private static final long REPORT_THRESHOLD = 1000 * 60 * 60 * 12; private static final long VIOLATION_THRESHOLD = 1000 * 60 * 60; private static final Object LOCK = new Object(); private String lastVerboseStatus = null; private String lastNonVerboseStatus = null; private static String METRICSNOTCOMINGINDIAGNOSTIC_DISABLE = "MetricsNotComingInDiagnostic.disable"; private DiagnosticsLogger diagnosticsLogger; private AuthzSubjectManager authzSubjectManager; private AvailabilityManager availabilityManager; private MeasurementManager measurementManager; private ResourceManager resourceManager; private PlatformManager platformManager; private MetricDataCache metricDataCache; private ApplicationContext ctx; private AtomicBoolean disabled = new AtomicBoolean(false); @Autowired public MetricsNotComingInDiagnostic(DiagnosticsLogger diagnosticsLogger, AuthzSubjectManager authzSubjectManager, AvailabilityManager availabilityManager, MeasurementManager measurementManager, ResourceManager resourceManager, PlatformManager platformManager, MetricDataCache metricDataCache) { this.diagnosticsLogger = diagnosticsLogger; this.authzSubjectManager = authzSubjectManager; this.availabilityManager = availabilityManager; this.measurementManager = measurementManager; this.resourceManager = resourceManager; this.platformManager = platformManager; this.metricDataCache = metricDataCache; String isDisabled = System.getProperty(METRICSNOTCOMINGINDIAGNOSTIC_DISABLE, "false"); if (isDisabled.equals("false")) { setDisabled(false); } else { setDisabled(true); } } public String getName() { return "Enabled Metrics Not Coming In"; } public String getShortName() { return "EnabledMetricsNotComingIn"; } public String getStatus() { return getReport(true); } public String getShortStatus() { return getReport(false); } public Boolean getDisabled() { return disabled.get(); } public void setDisabled(Boolean disabled) { log.info("Setting disabled flag to " + disabled.toString()); this.disabled.set(disabled); } private String getReport(final boolean isVerbose) { if (!HAUtil.isMasterNode()) { return "Server must be the primary node in the HA configuration before this report is valid."; } if (getDisabled()) { return "Report disabled"; } if ((now() - last.get()) < REPORT_THRESHOLD) { synchronized (LOCK) { String rtn = (isVerbose) ? lastVerboseStatus : lastNonVerboseStatus; if (rtn == null) { return "report will not be executed until the server is up for 60 minutes\n"; } return rtn; } } final StringBuilder verbose = new StringBuilder(); final StringBuilder nonVerbose = new StringBuilder(); try { setStatusBuf(nonVerbose, verbose); } catch (Throwable e) { log.error(e, e); } finally { last.set(now()); synchronized (LOCK) { lastVerboseStatus = verbose.toString(); lastNonVerboseStatus = nonVerbose.toString(); } } return (isVerbose) ? lastVerboseStatus : lastNonVerboseStatus; } private void setStatusBuf(StringBuilder nonVerbose, StringBuilder verbose) { final boolean debug = log.isDebugEnabled(); final StopWatch watch = new StopWatch(); /* !!!PLEASE NOTE!!! * This diagostic pulls in a large amount of pojos when it runs. In order to alleviate * the potential of it crashing the server this method does not run in single session * anymore. The reason for this is so that memory may be freed up while the method is * resolving all the resources not reporting metrics. The code looks odd since it is * pulling in pojos using hibernate get() to avoid lazy session initialization issues, * so please be careful when modifying it. */ if (debug) watch.markTimeBegin("getAllPlatforms"); final Collection<Platform> platforms = getAllPlatforms(); if (debug) watch.markTimeEnd("getAllPlatforms"); if (debug) watch.markTimeBegin("getResources"); final Map<Integer, Resource> resources = getResources(platforms); if (debug) watch.markTimeEnd("getResources"); final List<Integer> resourceIds = getResourceIds(resources.values()); if (debug) watch.markTimeBegin("getAvailMeasurements"); final Map<Integer, List<Measurement>> measCache = measurementManager.getAvailMeasurements(resourceIds); if (debug) watch.markTimeEnd("getAvailMeasurements"); if (debug) watch.markTimeBegin("getLastPlatformAvail"); final Map<Integer, MetricValue> avails = availabilityManager.getLastAvail(resources.values(), measCache); if (debug) watch.markTimeEnd("getLastPlatformAvail"); final List<Resource> children = new ArrayList<Resource>(); if (debug) watch.markTimeBegin("getChildren"); final Map<Integer,Platform> childrenToPlatform = getChildren(platforms, measCache, avails, resources, children); if (debug) watch.markTimeEnd("getChildren"); final List<Integer> childrenIds = getResourceIds(children); if (debug) watch.markTimeBegin("filterOutNonAvailableResources"); filterOutNonAvailableResources(childrenIds, childrenToPlatform); if (debug) watch.markTimeEnd("filterOutNonAvailableResources"); if (debug) watch.markTimeBegin("getEnabledMeasurements"); final Collection<List<Measurement>> measurements = measurementManager.getEnabledNonAvailMeasurements(children).values(); if (debug) watch.markTimeEnd("getEnabledMeasurements"); if (debug) watch.markTimeBegin("getLastMetricValues"); final Map<Integer,MetricValue> values = getLastMetricValues(measurements); if (debug) watch.markTimeEnd("getLastMetricValues"); if (debug) watch.markTimeBegin("getStatus"); setStatus(measurements, values, avails, childrenToPlatform, resources, nonVerbose, verbose); if (debug) watch.markTimeEnd("getStatus"); if (debug) { log.debug("getStatus: " + watch + ", { Size: [measCache=" + measCache.size() + "] [lastAvails=" + avails.size() + "] [childrenToPlatform=" + childrenToPlatform.size() + "] [enabledMeasurements=" + measurements.size() + "] [lastMetricValues=" + values.size() + "] }"); } } private List<Integer> getResourceIds(Collection<Resource> resources) { return new Transformer<Resource, Integer>() { @Override public Integer transform(Resource r) { return r.getId(); } }.transform(resources); } private void filterOutNonAvailableResources(List<Integer> resources, Map<Integer, Platform> resourcesToPlatform) { final Map<Integer, List<Measurement>> measCache = measurementManager.getAvailMeasurements(resources); final Map<Integer, MetricValue> avails = availabilityManager.getLastAvail(resources, measCache); for (final Iterator<Integer> it=resources.iterator(); it.hasNext(); ) { final Integer childId = it.next(); if (!resourceIsAvailable(childId, measCache, avails)) { it.remove(); resourcesToPlatform.remove(childId); } } } private void setStatus(Collection<List<Measurement>> measurementLists, Map<Integer, MetricValue> values, Map<Integer, MetricValue> avails, Map<Integer, Platform> childrenToPlatform, Map<Integer, Resource> resources, StringBuilder nonVerbose, StringBuilder verbose) { final Map<Platform, List<String>> platHierarchyNotReporting = new HashMap<Platform, List<String>>(); for (final List<Measurement> mList : measurementLists) { for (Measurement m : mList) { if (m != null && !values.containsKey(m.getId())) { final Platform platform = childrenToPlatform.get(m.getResource().getId()); if (platform == null) { continue; } List<String> tmp; if (null == (tmp = platHierarchyNotReporting.get(platform))) { tmp = new ArrayList<String>(); platHierarchyNotReporting.put(platform, tmp); } List<String> list = tmp; Resource res = resources.get(m.getResource().getId()); // res should not be null, but just in case if (res == null) { res = resourceManager.getResourceById(m.getResource().getId()); } MeasurementTemplate template = measurementManager.getTemplatesByMeasId(m.getId()); String templateName = (template == null) ? "UNKNOWN" : template.getName(); list.add(new StringBuilder(128) .append("\nmid=").append(m.getId()) .append(", name=").append(templateName) .append(", resid=").append(m.getResource().getId()) .append(", resname=").append(res.getName()) .toString()); } } } verbose.append("\nReport generated at ").append(TimeUtil.toString(now())) .append("\nEnabled metrics not reported in for ") .append(VIOLATION_THRESHOLD / 1000 / 60) .append(" minutes (by platform hierarchy)\n"); nonVerbose.append("\nReport generated at ").append(TimeUtil.toString(now())) .append("\nEnabled metrics not reported in for ") .append(VIOLATION_THRESHOLD / 1000 / 60) .append(" minutes (by platform hierarchy)\n"); verbose.append("------------------------------------------------------------------------\n"); nonVerbose.append("------------------------------------------------------------------------\n"); for (final Entry<Platform, List<String>> entry : platHierarchyNotReporting.entrySet()) { final Platform platform = entry.getKey(); verbose.append("\nfqdn=").append(platform.getFqdn()).append(" ("); nonVerbose.append("\nfqdn=").append(platform.getFqdn()).append(" ("); final List<String> children = (List<String>) entry.getValue(); // verbose verbose.append(children.size()); verbose.append(" not collecting):"); for (String xx : children) { verbose.append(xx); } // non verbose nonVerbose.append(children.size()); nonVerbose.append(" not collecting)"); } verbose.append("\n"); nonVerbose.append("\n"); } /** * @return {@link Map} of {@link Integer}s of resourceIds to their top level {@link Platform} */ private Map<Integer, Platform> getChildren(Collection<Platform> platforms, Map<Integer, List<Measurement>> measCache, Map<Integer, MetricValue> avails, Map<Integer, Resource> resourceMap, List<Resource> children) { final Map<Integer, Platform> rtn = new HashMap<Integer, Platform>(); final long now = now(); for (final Platform platform : platforms) { if ((now - platform.getCreationTime()) < VIOLATION_THRESHOLD || !measCache.containsKey(platform.getResource().getId()) || !resourceIsAvailable(platform.getResource().getId(), measCache, avails)) { resourceMap.remove(platform.getResource().getId()); continue; } } final Collection<ResourceEdge> edges = resourceManager.findResourceEdges( resourceManager.getContainmentRelation(), new ArrayList<Resource>(resourceMap.values())); for (final ResourceEdge edge : edges) { try { Resource from = resourceMap.get(edge.getFrom().getId()); // res should not be null, but just in case if (from == null) { from = resourceManager.getResourceById(edge.getFrom().getId()); } final Platform platform = platformManager.findPlatformById(from.getInstanceId()); final Resource child = resourceManager.getResourceById(edge.getTo().getId()); if (child == null || child.isInAsyncDeleteState()) { continue; } resourceMap.put(child.getId(), child); children.add(child); rtn.put(child.getId(), platform); } catch (PlatformNotFoundException e) { log.debug(e); } } return rtn; } private Map<Integer, Resource> getResources(Collection<Platform> platforms) { final Map<Integer, Resource> resources = new HashMap<Integer, Resource>(platforms.size()); for (final Platform platform : platforms) { final Integer resId = platform.getResource().getId(); final Resource r = resourceManager.getResourceById(resId); if (r == null || r.isInAsyncDeleteState()) { continue; } resources.put(resId, r); } return resources; } /** * @return {@link Map} of {@link Integer} of measurementIds to * {@link MetricValue} */ private Map<Integer, MetricValue> getLastMetricValues(Collection<List<Measurement>> measLists) { final List<Integer> mids = new ArrayList<Integer>(); for (final List<Measurement> measList : measLists) { for (final Measurement m : measList) { mids.add(m.getId()); } } return metricDataCache.getAll(mids, now() - VIOLATION_THRESHOLD); } private static long now() { return System.currentTimeMillis(); } private boolean resourceIsAvailable(Integer resourceId, Map<Integer, List<Measurement>> measCache, Map<Integer, MetricValue> avails) { final List<Measurement> measurements = measCache.get(resourceId); if (measurements == null || measurements.isEmpty()) { return false; } final Measurement availMeas = (Measurement) measurements.get(0); MetricValue val = avails.get(availMeas.getId()); return (val.getValue() == MeasurementConstants.AVAIL_DOWN) ? false : true; } private Collection<Platform> getAllPlatforms() { AuthzSubject overlord = authzSubjectManager.getOverlordPojo(); try { return platformManager.findAll(overlord); } catch (PermissionException e) { log.error(e,e); return Collections.emptyList(); } } public void onApplicationEvent(ContextRefreshedEvent event) { if (event.getApplicationContext() != ctx) { return; } diagnosticsLogger.addDiagnosticObject((DiagnosticObject) Bootstrap.getBean("metricsNotComingInDiagnostic")); } public void setApplicationContext(ApplicationContext applicationContext) throws BeansException { this.ctx = applicationContext; } }