/*************************************************************************** * Copyright (c) 2013 VMware, Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ package com.vmware.vhadoop.vhm; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.Callable; import java.util.logging.Level; import java.util.logging.Logger; import com.vmware.vhadoop.api.vhm.ClusterMap.ExtraInfoToClusterMapper; import com.vmware.vhadoop.api.vhm.ClusterMapReader; import com.vmware.vhadoop.api.vhm.ExecutionStrategy; import com.vmware.vhadoop.api.vhm.HealthMonitor; import com.vmware.vhadoop.api.vhm.QueueClient.CannotConnectException; import com.vmware.vhadoop.api.vhm.VCActions; import com.vmware.vhadoop.api.vhm.VHMCollaborator; import com.vmware.vhadoop.api.vhm.events.ClusterHealthEvent; import com.vmware.vhadoop.api.vhm.events.ClusterScaleCompletionEvent; import com.vmware.vhadoop.api.vhm.events.ClusterScaleEvent; import com.vmware.vhadoop.api.vhm.events.ClusterStateChangeEvent; import com.vmware.vhadoop.api.vhm.events.EventConsumer; import com.vmware.vhadoop.api.vhm.events.EventInjector; import com.vmware.vhadoop.api.vhm.events.EventProducer; import com.vmware.vhadoop.api.vhm.events.NotificationEvent; import com.vmware.vhadoop.api.vhm.strategy.ScaleStrategy; import com.vmware.vhadoop.api.vhm.strategy.ScaleStrategy.VMChooserCallback; import com.vmware.vhadoop.api.vhm.strategy.VMChooser; import com.vmware.vhadoop.util.ExternalizedParameters; import com.vmware.vhadoop.util.ThreadLocalCompoundStatus; import com.vmware.vhadoop.util.VhmLevel; import com.vmware.vhadoop.vhm.events.AbstractClusterScaleEvent; import com.vmware.vhadoop.vhm.events.AbstractNotificationEvent; import com.vmware.vhadoop.vhm.events.ClusterScaleDecision; import com.vmware.vhadoop.vhm.events.ClusterUpdateEvent; import com.vmware.vhadoop.vhm.events.NewVmEvent; import com.vmware.vhadoop.vhm.events.SerengetiLimitInstruction; import com.vmware.vhadoop.vhm.events.SerengetiLimitInstruction.SerengetiLimitAction; import com.vmware.vhadoop.vhm.events.VmRemovedFromClusterEvent; import com.vmware.vhadoop.vhm.events.VmUpdateEvent; import com.vmware.vhadoop.vhm.strategy.ManualScaleStrategy; public class VHM implements EventConsumer { private final EventProducerActions _eventProducers; private final Queue<NotificationEvent> _eventQueue; private boolean _initialized; private final AbstractClusterMap _clusterMap; private final ExecutionStrategy _executionStrategy; private final VCActions _vcActions; private final MultipleReaderSingleWriterClusterMapAccess _clusterMapAccess; private final ClusterMapReader _parentClusterMapReader; private final Set<VMChooser> _vmChoosers; private final Set<EventInjector> _eventInjectors; private HealthMonitor _healthMonitor; private volatile boolean _running = false; private volatile boolean _stopped = true; private static final Logger _log = Logger.getLogger(VHM.class.getName()); private static final long CLUSTER_COMPLETENESS_GRACE_TIME_MILLIS = ExternalizedParameters.get().getLong("CLUSTER_COMPLETENESS_GRACE_TIME_MILLIS"); private static long EVENT_PRODUCER_START_GRACE_TIME_MILLIS = ExternalizedParameters.get().getLong("EVENT_PRODUCER_START_GRACE_TIME_MILLIS"); private static long EVENT_PRODUCER_STOP_GRACE_TIME_MILLIS = ExternalizedParameters.get().getLong("EVENT_PRODUCER_STOP_GRACE_TIME_MILLIS"); VHM(VCActions vcActions, ScaleStrategy[] scaleStrategies, ExtraInfoToClusterMapper strategyMapper, ThreadLocalCompoundStatus threadLocalStatus) { _eventProducers = new EventProducerActions(); _eventQueue = new LinkedList<NotificationEvent>(); _initialized = true; _clusterMap = new CachingClusterMapImpl(strategyMapper); _vcActions = vcActions; _clusterMapAccess = MultipleReaderSingleWriterClusterMapAccess.getClusterMapAccess(_clusterMap); _parentClusterMapReader = new AbstractClusterMapReader(_clusterMapAccess, threadLocalStatus) {}; initScaleStrategies(scaleStrategies); _executionStrategy = new ThreadPoolExecutionStrategy(); if ((_executionStrategy instanceof EventProducer) && !registerEventProducer((EventProducer)_executionStrategy)) { throw new RuntimeException("Fatal error registering ThreadPoolExecutionStrategy as an event producer"); } _vmChoosers = new HashSet<VMChooser>(); _eventInjectors = new HashSet<EventInjector>(); } public void registerHealthMonitor(HealthMonitor healthMonitor) { _healthMonitor = healthMonitor; } public void registerCollaborator(VHMCollaborator collaborator) { if (collaborator instanceof VMChooser) { _vmChoosers.add((VMChooser)collaborator); } if (collaborator instanceof EventInjector) { _eventInjectors.add((EventInjector)collaborator); } if (collaborator instanceof ClusterMapReader) { ((ClusterMapReader)collaborator).initialize(_parentClusterMapReader); } } private void initScaleStrategies(ScaleStrategy[] scaleStrategies) { for (ScaleStrategy strategy : scaleStrategies) { _clusterMap.registerScaleStrategy(strategy); strategy.setVMChooserCallback(new VMChooserCallback() { @Override public Set<VMChooser> getVMChoosers() { return _vmChoosers; } @Override public VMChooser getVMChooserForType(Class<? extends VMChooser> vmChooserType) { for (VMChooser vmChooser : _vmChoosers) { if (vmChooserType.isAssignableFrom(vmChooser.getClass())) { return vmChooser; } } return null; } }); strategy.initialize(_parentClusterMapReader); } } private class EventProducerResetEvent extends AbstractNotificationEvent { EventProducerResetEvent() { super(false, false); } } /* Threading Requirement: * - Protect _eventProducers collection from changing while another thread is iterating over it * - Treat stop() start() and reset() as blocking atomic operations * - Allow threads to figure out the state of the event producers without blocking */ private class EventProducerActions { private final Set<EventProducer> _eventProducers = new HashSet<EventProducer>(); private final Set<EventProducer> _startedProducers = Collections.synchronizedSet(new HashSet<EventProducer>()); /* DO NOT ITERATE */ private final EventProducer.EventProducerStartStopCallback _startStopHandler = new EventProducerStartStopHandler(); private class EventProducerStartStopHandler implements EventProducer.EventProducerStartStopCallback { @Override public void notifyFailed(EventProducer thisProducer) { _log.severe("VHM: "+thisProducer.getClass().getName()+" - event producer stopped unexpectedly, so resetting event producers"); placeEventOnQueue(new EventProducerResetEvent()); } @Override public void notifyStarted(EventProducer thisProducer) { _startedProducers.add(thisProducer); _log.fine("Total started event producers = "+_startedProducers.size()); } @Override public void notifyStopped(EventProducer thisProducer) { _startedProducers.remove(thisProducer); _log.fine("Total started event producers = "+_startedProducers.size()); } } /* Block until the event producers to be started have actually stopped */ synchronized boolean stop() { boolean result; Set<EventProducer> waitForStop = new HashSet<EventProducer>(); for (EventProducer eventProducer : _eventProducers) { if (_startedProducers.contains(eventProducer)) { eventProducer.stop(); waitForStop.add(eventProducer); } } result = waitForStateChange(waitForStop, false, EVENT_PRODUCER_STOP_GRACE_TIME_MILLIS); _log.fine("Event producers stop returning "+result); return result; } /* Block until the event producers to be started have actually started */ synchronized boolean start() { boolean result; Set<EventProducer> waitForStart = new HashSet<EventProducer>(); for (EventProducer eventProducer : _eventProducers) { if (!_startedProducers.contains(eventProducer)) { eventProducer.start(_startStopHandler); waitForStart.add(eventProducer); } } result = waitForStateChange(waitForStart, true, EVENT_PRODUCER_START_GRACE_TIME_MILLIS); _log.fine("Event producers start returning "+result); return result; } private boolean waitForStateChange(Set<EventProducer> producers, boolean waitForStart, long timeoutMillis) { boolean done; int timeoutCountdown = (int)timeoutMillis; final int sleepTimeMillis = 100; do { done = true; for (EventProducer producer : producers) { if (_startedProducers.contains(producer) != waitForStart) { done = false; break; } } try { Thread.sleep(sleepTimeMillis); } catch (InterruptedException e) {} } while (!done && ((timeoutCountdown -= sleepTimeMillis) > 0)); return (timeoutCountdown > 0); } /* Block until the event producers to be started have restarted */ synchronized boolean reset() { if (stop()) { if (start()) { _log.fine("Event producers successfully restarted"); return true; } else { _log.warning("VHM: event producers start failed during reset"); } } else { _log.warning("VHM: event producers stop failed during reset"); } return false; } synchronized boolean registerNew(EventProducer eventProducer) { boolean result = true; _eventProducers.add(eventProducer); eventProducer.registerEventConsumer(VHM.this); if (eventProducer instanceof ClusterMapReader) { ((ClusterMapReader)eventProducer).initialize(_parentClusterMapReader); } eventProducer.start(_startStopHandler); Set<EventProducer> singleItemSet = new HashSet<EventProducer>(); result = waitForStateChange(singleItemSet, true, EVENT_PRODUCER_START_GRACE_TIME_MILLIS); _log.fine("Event producer "+eventProducer.getClass().getName()+" registerd. Start result = "+result); return result; } boolean isAllStopped() { return _startedProducers.isEmpty(); } } private boolean checkForProducerReset(Set<NotificationEvent> events) { List<NotificationEvent> toRemove = null; for (NotificationEvent notificationEvent : events) { if (notificationEvent instanceof EventProducerResetEvent) { if (toRemove == null) { toRemove = new ArrayList<NotificationEvent>(); } toRemove.add(notificationEvent); } } if (toRemove != null) { _log.info("Event Producer reset requested..."); events.removeAll(toRemove); return true; } return false; } public boolean registerEventProducer(EventProducer eventProducer) { return _eventProducers.registerNew(eventProducer); } private void addEventToQueue(NotificationEvent event) { Queue<NotificationEvent> toKeepQueue = null; if (event.getCanClearQueue()) { for (NotificationEvent e : _eventQueue) { if (!e.getCanBeClearedFromQueue()) { if (toKeepQueue == null) { toKeepQueue = new LinkedList<NotificationEvent>(); } toKeepQueue.add(e); } } _eventQueue.clear(); } _eventQueue.add(event); if (toKeepQueue != null) { _eventQueue.addAll(toKeepQueue); } } /* This can be called by multiple threads */ @Override public void placeEventOnQueue(NotificationEvent event) { if (!_initialized) { return; } if (event != null) { synchronized(_eventQueue) { addEventToQueue(event); _eventQueue.notify(); } } } /* This bypasses the injected derived events processing, which should only see new events */ private void requeueExistingEvents(List<? extends NotificationEvent> events) { if (!_initialized) { return; } synchronized(_eventQueue) { for (NotificationEvent event : events) { addEventToQueue(event); } _eventQueue.notify(); } } @Override public void placeEventCollectionOnQueue(List<? extends NotificationEvent> events) { if (!_initialized) { return; } synchronized(_eventQueue) { for (NotificationEvent event : events) { addEventToQueue(event); } _eventQueue.notify(); } } public Set<NotificationEvent> pollForEvents() { Set<NotificationEvent> results = null; synchronized(_eventQueue) { while (_eventQueue.peek() == null) { try { _eventQueue.wait(); } catch (InterruptedException e) { _log.warning("VHM: interrupted unexpectedly while waiting for event"); } } results = new LinkedHashSet<NotificationEvent>(); while (_eventQueue.peek() != null) { /* Use of a Set ensured duplicates are eliminated */ /* TODO: add an event key to do event consolidation. At the moment events use the default equality so this has little effect */ results.add(_eventQueue.poll()); } } return results; } public NotificationEvent getEventPending() { synchronized(_eventQueue) { return _eventQueue.peek(); } } /* TODO: Note that currently, this method cannot deal with a clusterScaleEvent with just a hostId * We should be able to deal with this at some point - ie: general host contention impacts multiple clusters */ private String completeClusterScaleEventDetails(AbstractClusterScaleEvent event) { String clusterId = event.getClusterId(); if ((clusterId == null) && (event instanceof SerengetiLimitInstruction)) { String clusterName = ((SerengetiLimitInstruction)event).getClusterName(); if (clusterName != null) { clusterId = _clusterMap.getClusterIdForName(clusterName); } } if (clusterId == null) { /* Find the clusterId from the VM */ String hostId = event.getHostId(); String vmId = event.getVmId(); /* Find the host if it has not been provided */ if (hostId == null) { if (vmId != null) { hostId = _clusterMap.getHostIdForVm(vmId); event.setHostId(hostId); } } if (vmId != null) { clusterId = _clusterMap.getClusterIdForVm(vmId); } else { _log.warning("<%C"+event.getClusterId()+"%C>: no usable data from ClusterScaleEvent (" + event.getVmId() + "," + event.getHostId() + "," + event.getClusterId() + ")"); if (event instanceof SerengetiLimitInstruction) { SerengetiLimitInstruction sEvent = (SerengetiLimitInstruction)event; _log.warning(sEvent.getClusterName()+": unusable scale event is a serengeti limit instruction"); } _clusterMap.dumpState(Level.WARNING); } } event.setClusterId(clusterId); return clusterId; } private void updateOrCreateClusterScaleEventSet(String clusterId, ClusterScaleEvent newEvent, Map<String, Set<ClusterScaleEvent>> clusterScaleEventMap) { Set<ClusterScaleEvent> clusterScaleEvents = clusterScaleEventMap.get(clusterId); if (clusterScaleEvents == null) { clusterScaleEvents = new LinkedHashSet<ClusterScaleEvent>(); /* Preserve order */ clusterScaleEventMap.put(clusterId, clusterScaleEvents); } clusterScaleEvents.add(newEvent); } /* The method takes all new events polled from the event queue, pulls out any ClsuterScaleEvents and organizes them by Cluster */ private void getQueuedScaleEventsForCluster(Set<NotificationEvent> events, Map<String, Set<ClusterScaleEvent>> clusterScaleEventMap) { if (clusterScaleEventMap != null) { for (NotificationEvent event : events) { if (event instanceof AbstractClusterScaleEvent) { /* Derive the cluster ID and other details if the event does not already have it */ String clusterId = completeClusterScaleEventDetails((AbstractClusterScaleEvent)event); if (clusterId != null) { updateOrCreateClusterScaleEventSet(clusterId, (ClusterScaleEvent)event, clusterScaleEventMap); } else if (event instanceof SerengetiLimitInstruction) { try { ((SerengetiLimitInstruction) event).reportError("Unable to resolve cluster ID from vCenter"); } catch (CannotConnectException e) { _log.warning("Failed to report error back on rabbit queue: "+e.getCause().getMessage()); } } } } } } private Set<ClusterStateChangeEvent> getClusterAddRemoveEvents(Set<NotificationEvent> events) { Set<ClusterStateChangeEvent> results = new LinkedHashSet<ClusterStateChangeEvent>(); /* Preserve order */ for (NotificationEvent event : events) { if ((event instanceof NewVmEvent) || (event instanceof VmRemovedFromClusterEvent)) { results.add((ClusterStateChangeEvent)event); } } return results; } private Set<ClusterStateChangeEvent> getClusterUpdateEvents(Set<NotificationEvent> events) { Set<ClusterStateChangeEvent> results = new LinkedHashSet<ClusterStateChangeEvent>(); /* Preserve order */ for (NotificationEvent event : events) { if ((event instanceof VmUpdateEvent) || (event instanceof ClusterUpdateEvent)) { results.add((ClusterStateChangeEvent)event); } } return results; } private Set<ClusterScaleCompletionEvent> getClusterScaleCompletionEvents(Set<NotificationEvent> events) { Set<ClusterScaleCompletionEvent> results = new LinkedHashSet<ClusterScaleCompletionEvent>(); /* Preserve order */ for (NotificationEvent event : events) { if (event instanceof ClusterScaleCompletionEvent) { results.add((ClusterScaleCompletionEvent)event); } } return results; } private Set<ClusterHealthEvent> getClusterHealthEvents(Set<NotificationEvent> events) { Set<ClusterHealthEvent> results = new LinkedHashSet<ClusterHealthEvent>(); /* Preserve order */ for (NotificationEvent event : events) { if (event instanceof ClusterHealthEvent) { results.add((ClusterHealthEvent)event); } } return results; } private void doRemove(Set<ClusterScaleEvent> scaleEventsForCluster, Set<ClusterScaleEvent> toRemove, String method) { if (toRemove != null) { int beforeSize = scaleEventsForCluster.size(); scaleEventsForCluster.removeAll(toRemove); int afterSize = scaleEventsForCluster.size(); _log.info("Consolidating scale events from "+beforeSize+" to "+afterSize+" for method "+method); } } /* Takes a list of types that are allowed for a particular cluster and removes any * events that are not of those types, either directly or through inheritance */ private void removeEventsThisClusterCantHandle(Class<? extends ClusterScaleEvent>[] typesHandled, Set<ClusterScaleEvent> scaleEventsForCluster) { Set<ClusterScaleEvent> toRemove = null; for (ClusterScaleEvent event : scaleEventsForCluster) { boolean isAssignableFromAtLeastOne = false; for (Class<? extends ClusterScaleEvent> typeHandled : typesHandled) { if (typeHandled.isAssignableFrom(event.getClass())) { isAssignableFromAtLeastOne = true; break; } } if (!isAssignableFromAtLeastOne) { _log.finer("Scale strategy cannot handle event "+event); if (toRemove == null) { toRemove = new HashSet<ClusterScaleEvent>(); } toRemove.add(event); } } doRemove(scaleEventsForCluster, toRemove, "removeEventsThisClusterCantHandle"); } /* If events are marked isExclusive() == true and if there are duplicate * events of that type, only the most recent should be returned */ private void consolidateExclusiveEvents(Set<ClusterScaleEvent> scaleEventsForCluster) { Set<ClusterScaleEvent> toRemove = new HashSet<ClusterScaleEvent>();; Map<Class<? extends ClusterScaleEvent>, ClusterScaleEvent> newestEventMap = new HashMap<Class<? extends ClusterScaleEvent>, ClusterScaleEvent>(); for (ClusterScaleEvent event : scaleEventsForCluster) { if (event.isExclusive()) { ClusterScaleEvent toCompare = newestEventMap.get(event.getClass()); if (toCompare == null) { newestEventMap.put(event.getClass(), event); } else { if (toCompare.getTimestamp() > event.getTimestamp()) { toRemove.add(event); } else { newestEventMap.put(event.getClass(), event); toRemove.add(toCompare); } } } } if (toRemove.size() > 0) { doRemove(scaleEventsForCluster, toRemove, "consolidateExclusiveEvents"); } } /* For now, remove any events that the scale strategy is not designed to be able to handle */ private Set<ClusterScaleEvent> consolidateClusterEvents(ScaleStrategy scaleStrategy, Set<ClusterScaleEvent> scaleEventsForCluster) { removeEventsThisClusterCantHandle(scaleStrategy.getScaleEventTypesHandled(), scaleEventsForCluster); consolidateExclusiveEvents(scaleEventsForCluster); return scaleEventsForCluster; } private SerengetiLimitInstruction pendingBlockingSwitchToManual(Set<ClusterScaleEvent> consolidatedEvents) { for (ClusterScaleEvent clusterScaleEvent : consolidatedEvents) { if (clusterScaleEvent instanceof SerengetiLimitInstruction) { SerengetiLimitInstruction returnVal = (SerengetiLimitInstruction)clusterScaleEvent; if (returnVal.getAction().equals(SerengetiLimitAction.actionWaitForManual)) { return returnVal; } } } return null; } /* Process new cluster state change events received from the ClusterStateChangeListener * The impliedScaleEventsMap allows any clusterScaleEvents implied by cluster state changes to be added */ private void handleClusterStateChangeEvents(Set<ClusterStateChangeEvent> eventsToProcess, Map<String, Set<ClusterScaleEvent>> impliedScaleEventsMap) { Set<ClusterScaleEvent> impliedScaleEventsForCluster = new LinkedHashSet<ClusterScaleEvent>(); /* Preserve order */ for (ClusterStateChangeEvent event : eventsToProcess) { _log.info("ClusterStateChangeEvent received: "+event.toString(_log)); /* ClusterMap will process the event and may add an implied scale event (see ExtraInfoToClusterMapper) */ String clusterId = _clusterMap.handleClusterEvent(event, impliedScaleEventsForCluster); if (clusterId != null) { /* If there are new scale events, create or update the Set in the impliedScaleEventsMap */ if (impliedScaleEventsForCluster.size() > 0) { if (impliedScaleEventsMap.get(clusterId) == null) { impliedScaleEventsMap.put(clusterId, impliedScaleEventsForCluster); impliedScaleEventsForCluster = new LinkedHashSet<ClusterScaleEvent>(); } else { impliedScaleEventsMap.get(clusterId).addAll(impliedScaleEventsForCluster); impliedScaleEventsForCluster.clear(); } } } } } /* When events are polled, this is the first method that gets the opportunity to triage them */ private void handleEvents(Set<NotificationEvent> events) { final Set<NotificationEvent> newRequeuedAndInjectedEvents = new LinkedHashSet<NotificationEvent>(events); /* addRemoveEvents are events that affect the shape of a cluster */ final Set<ClusterStateChangeEvent> addRemoveEvents = getClusterAddRemoveEvents(events); /* updateEvents are events that change the state of a cluster */ final Set<ClusterStateChangeEvent> updateEvents = getClusterUpdateEvents(events); /* completionEvents are received when a cluster scale thread has finished executing */ final Set<ClusterScaleCompletionEvent> completionEvents = getClusterScaleCompletionEvents(events); /* clusterScaleEvents are events suggesting the scaling up or down of a cluster */ /* (we really want to ensure that no null clusterIds get added to clusterScaleEvents in the event of an error) */ final Map<String, Set<ClusterScaleEvent>> clusterScaleEvents = new HashMap<String, Set<ClusterScaleEvent>>(); /* ClusterMap is updated by VHM based on events that come in from the ClusterStateChangeListener * The first thing we do here is update ClusterMap to ensure that the latest state is reflected ASAP * Note that clusterScaleEvents can be implied by cluster state changes, so new clusterScaleEvents can be added here */ if ((addRemoveEvents.size() + updateEvents.size() + completionEvents.size()) > 0) { try { _clusterMapAccess.runCodeInWriteLock(new Callable<Object>() { @Override public Object call() throws Exception { /* Add/remove events are handled first as these will have the most significant impact */ handleClusterStateChangeEvents(addRemoveEvents, clusterScaleEvents); /* Note that the scale strategy key may be updated here so any subsequent call to getScaleStrategyForCluster will reflect the change */ handleClusterStateChangeEvents(updateEvents, clusterScaleEvents); for (ClusterScaleCompletionEvent event : completionEvents) { _log.info("ClusterScaleCompletionEvent received: "+event.getClass().getName()); if (event instanceof ClusterScaleDecision) { /* The option exists for a scaleStrategy to ask for an event to be re-queued, causing it to be re-invoked */ List<NotificationEvent> eventsToRequeue = ((ClusterScaleDecision)event).getEventsToRequeue(); if (eventsToRequeue != null) { _log.info("Requeuing event(s) from ClusterScaleCompletionEvent: "+eventsToRequeue); newRequeuedAndInjectedEvents.addAll(eventsToRequeue); } } _clusterMap.handleCompletionEvent(event); } return null; } }); } catch (Exception e) { _log.severe("Exception updating ClusterMap: "+e); } } /* Event injection happens immediately after ClusterMap has been updated and allows collaborators to inject derived events to be processed */ Set<NotificationEvent> allInjectedEvents = new LinkedHashSet<NotificationEvent>(); for (NotificationEvent event : newRequeuedAndInjectedEvents) { for (EventInjector eventInjector : _eventInjectors) { Set<? extends NotificationEvent> newEvents = eventInjector.processEvent(event); if (newEvents != null) { for (NotificationEvent injectedEvent : newEvents) { _log.info(eventInjector.getName()+" injecting new event "+injectedEvent+" in response to event "+event); } allInjectedEvents.addAll(newEvents); } } } newRequeuedAndInjectedEvents.addAll(allInjectedEvents); /* events that indicate a change in health status for a cluster */ Set<ClusterHealthEvent> healthEvents = getClusterHealthEvents(newRequeuedAndInjectedEvents); if ((_healthMonitor != null) && (healthEvents != null)) { _healthMonitor.handleHealthEvents(_vcActions, healthEvents); } getQueuedScaleEventsForCluster(newRequeuedAndInjectedEvents, clusterScaleEvents); /* If there are scale events to handle, we need to invoke the scale strategies for each cluster * The ordering in which we process the clusters doesn't matter as they will be done concurrently */ if (clusterScaleEvents.size() > 0) { for (String clusterId : clusterScaleEvents.keySet()) { if (clusterId == null) { /* This should not happen - defensive coding */ continue; } Set<ClusterScaleEvent> unconsolidatedEvents = clusterScaleEvents.get(clusterId); if (unconsolidatedEvents == null) { continue; } /* If ClusterMap has not yet been fully updated with information about a cluster, defer this operation */ Boolean clusterCompleteness = _clusterMap.validateClusterCompleteness(clusterId, CLUSTER_COMPLETENESS_GRACE_TIME_MILLIS); if (clusterCompleteness != null) { if (!clusterCompleteness) { if (unconsolidatedEvents.size() > 0) { _log.info("ClusterInfo not yet complete. Putting event collection back on queue for cluster <%C"+clusterId); requeueExistingEvents(new ArrayList<ClusterScaleEvent>(unconsolidatedEvents)); } continue; } } else { _log.warning("<%C"+clusterId+"%C>: cluster has been incomplete for longer than the grace period of " +CLUSTER_COMPLETENESS_GRACE_TIME_MILLIS+"ms. Dumping queued events for it"); continue; } /* Note that any update to the scale strategy will already have been processed above in handleClusterStateChangeEvents */ ScaleStrategy scaleStrategy = _clusterMap.getScaleStrategyForCluster(clusterId); if (scaleStrategy == null) { _log.severe("<%C"+clusterId+"%C>: there is no scaleStrategy set for cluster"); continue; } _log.finer("Using "+scaleStrategy.getKey()+" scale strategy to filter events for cluster <%C"+clusterId); /* UnconsolidatedEvents guaranteed to be non-null and consolidatedEvents should be a trimmed down version of the same collection */ Set<ClusterScaleEvent> consolidatedEvents = consolidateClusterEvents(scaleStrategy, unconsolidatedEvents); if (consolidatedEvents.size() > 0) { /* If there is an instruction from Serengeti to switch to manual, strip out that one event and dump the others */ SerengetiLimitInstruction switchToManualEvent = pendingBlockingSwitchToManual(consolidatedEvents); if (switchToManualEvent != null) { /* If Serengeti has made the necessary change to extraInfo AND any other scaling has completed, inform completion */ boolean extraInfoChanged = _clusterMap.getScaleStrategyKey(clusterId).equals(ManualScaleStrategy.MANUAL_SCALE_STRATEGY_KEY); boolean scalingCompleted = !_executionStrategy.isClusterScaleInProgress(clusterId); try { if (extraInfoChanged && scalingCompleted) { _log.info("Switch to manual scale strategy for cluster <%C"+clusterId+"%C> is now complete. Reporting back to Serengeti"); switchToManualEvent.reportCompletion(); } else { /* Continue to block Serengeti CLI by putting the event back on the queue */ switchToManualEvent.reportProgress(0, "waiting for current scaling operation to complete"); requeueExistingEvents(Arrays.asList(new ClusterScaleEvent[]{switchToManualEvent})); } } catch (CannotConnectException e) { _log.warning("Failed to report progress back on rabbit queue: "+e.getCause().getMessage()); } /* Call out to the execution strategy to handle the scale events for the cluster - non blocking */ } else if (!_executionStrategy.handleClusterScaleEvents(clusterId, scaleStrategy, consolidatedEvents)) { /* If we couldn't schedule handling of the events, put them back on the queue in their un-consolidated form */ _log.finest("Putting event collection back onto VHM queue - size="+unconsolidatedEvents.size()); requeueExistingEvents(new ArrayList<ClusterScaleEvent>(unconsolidatedEvents)); } } } } } public Thread start() { _stopped = false; Thread t = new Thread(new Runnable() { @Override public void run() { try { _running = true; while (_running) { Set<NotificationEvent> events = pollForEvents(); if (checkForProducerReset(events)) { if (!_eventProducers.reset()) { _log.severe("VHM: unable to reset event producers - Terminating VHM"); break; } } handleEvents(events); Thread.sleep(500); } } catch (Throwable e) { _log.log(Level.WARNING, "VHM: stopping due to exception - "+e.getMessage()); _log.log(Level.INFO, "VHM: stopping due to exception in VHM main loop", e); } _log.info("VHM stopping..."); _stopped = true; }}, "VHM_Main_Thread"); t.start(); return t; } public void stop(boolean hardStop) { _log.log(VhmLevel.USER, "VHM: stopping"); _running = false; _eventProducers.stop(); placeEventOnQueue(new AbstractNotificationEvent(hardStop, false) {}); } public boolean isStopped() { return (_stopped && _eventProducers.isAllStopped()); } VCActions getVCActions() { return _vcActions; } /** * Hack to specifically dump the cluster map instead of a more generic state of the world * */ public void dumpClusterMap(Level level) { _clusterMap.dumpState(level); } }