/************************************************************************* * Copyright 2009-2015 Eucalyptus Systems, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see http://www.gnu.org/licenses/. * * Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta * CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need * additional information or have any questions. * * This file may incorporate work covered under the following copyright * and permission notice: * * Software License Agreement (BSD License) * * Copyright (c) 2008, Regents of the University of California * All rights reserved. * * Redistribution and use of this software in source and binary forms, * with or without modification, are permitted provided that the * following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. USERS OF THIS SOFTWARE ACKNOWLEDGE * THE POSSIBLE PRESENCE OF OTHER OPEN SOURCE LICENSED MATERIAL, * COPYRIGHTED MATERIAL OR PATENTED MATERIAL IN THIS SOFTWARE, * AND IF ANY SUCH MATERIAL IS DISCOVERED THE PARTY DISCOVERING * IT MAY INFORM DR. RICH WOLSKI AT THE UNIVERSITY OF CALIFORNIA, * SANTA BARBARA WHO WILL THEN ASCERTAIN THE MOST APPROPRIATE REMEDY, * WHICH IN THE REGENTS' DISCRETION MAY INCLUDE, WITHOUT LIMITATION, * REPLACEMENT OF THE CODE SO IDENTIFIED, LICENSING OF THE CODE SO * IDENTIFIED, OR WITHDRAWAL OF THE CODE CAPABILITY TO THE EXTENT * NEEDED TO COMPLY WITH ANY SUCH LICENSES OR RIGHTS. ************************************************************************/ package com.eucalyptus.component; import java.util.Arrays; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.LinkedTransferQueue; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import javax.persistence.Column; import javax.persistence.Entity; import javax.persistence.EnumType; import javax.persistence.Enumerated; import javax.persistence.GeneratedValue; import javax.persistence.Id; import javax.persistence.Lob; import javax.persistence.PersistenceContext; import javax.persistence.PrePersist; import javax.persistence.PreUpdate; import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; import javax.persistence.Version; import com.google.common.base.Joiner; import com.google.common.base.Objects; import com.google.common.collect.Collections2; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimaps; import com.google.common.collect.SetMultimap; import org.apache.log4j.Logger; import org.hibernate.annotations.GenericGenerator; import org.hibernate.annotations.Type; import com.eucalyptus.bootstrap.Bootstrap; import com.eucalyptus.bootstrap.BootstrapArgs; import com.eucalyptus.bootstrap.Bootstrapper; import com.eucalyptus.bootstrap.Hosts; import com.eucalyptus.component.Component.State; import com.eucalyptus.component.Component.Transition; import com.eucalyptus.component.fault.FaultBuilderImpl; import com.eucalyptus.component.fault.FaultSubsystemManager; import com.eucalyptus.component.id.Eucalyptus; import com.eucalyptus.configurable.ConfigurableClass; import com.eucalyptus.configurable.ConfigurableField; import com.eucalyptus.empyrean.ServiceStatusDetail; import com.eucalyptus.empyrean.ServiceStatusType; import com.eucalyptus.event.EventListener; import com.eucalyptus.event.Hertz; import com.eucalyptus.event.Listeners; import com.eucalyptus.records.Logs; import com.eucalyptus.scripting.Groovyness; import com.eucalyptus.system.SubDirectory; import com.eucalyptus.system.Threads; import com.eucalyptus.util.Emails; import com.eucalyptus.util.Exceptions; import com.eucalyptus.util.TypeMapper; import com.eucalyptus.util.TypeMappers; import com.eucalyptus.util.fsm.TransitionRecord; import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @ConfigurableClass( root = "bootstrap.notifications", description = "Parameters controlling the handling of service state notifications." ) public class Faults { private static Logger LOG = Logger.getLogger( Faults.class ); private static final String DEFAULT_EMAIL_SUBJECT_PREFIX = "[eucalyptus-notifications] "; private static final String DEFAULT_EMAIL_FROM = "notification@eucalyptus"; private static final String DEFAULT_EMAIL_FROM_NAME = "Eucalyptus Notifications"; @ConfigurableField( description = "Email address where notifications are to be delivered." ) public static String EMAIL_TO; @ConfigurableField( description = "From email address used for notification delivery.", initial = DEFAULT_EMAIL_FROM ) public static String EMAIL_FROM = DEFAULT_EMAIL_FROM; @ConfigurableField( description = "From email name used for notification delivery.", initial = DEFAULT_EMAIL_FROM_NAME ) public static String EMAIL_FROM_NAME = DEFAULT_EMAIL_FROM_NAME; @ConfigurableField( description = "Email subject used for notification delivery.", initial = DEFAULT_EMAIL_SUBJECT_PREFIX ) public static String EMAIL_SUBJECT_PREFIX = DEFAULT_EMAIL_SUBJECT_PREFIX; @ConfigurableField( description = "Interval (in seconds) during which a notification will be delayed to allow for batching events for delivery.", initial = "60" ) public static Integer BATCH_DELAY_SECONDS = 60; @ConfigurableField( description = "Send a system state digest periodically.", initial = "false" ) public static Boolean DIGEST = Boolean.FALSE; @ConfigurableField( description = "If sending system state digests is set to true, then only send the digest when the system has failures to report.", initial = "true" ) public static Boolean DIGEST_ONLY_ON_ERRORS = Boolean.TRUE; @ConfigurableField( description = "Period (in hours) with which a system state digest will be delivered.", initial = "24" ) public static Integer DIGEST_FREQUENCY_HOURS = 24; @ConfigurableField( description = "Period (in hours) with which a system state digest will be delivered.", initial = "false" ) public static Boolean INCLUDE_FAULT_STACK = Boolean.FALSE; enum NoopErrorFilter implements Predicate<Throwable> { INSTANCE; @Override public boolean apply( final Throwable input ) { Logs.exhaust( ).error( input, input ); return true; } } @Entity @PersistenceContext( name = "eucalyptus_faults" ) @Table( name = "faults_records" ) public static class CheckException extends RuntimeException implements Iterable<CheckException> { @Id @GeneratedValue( generator = "system-uuid" ) @GenericGenerator( name = "system-uuid", strategy = "uuid" ) @Column( name = "id" ) String id; @Version @Column( name = "version" ) Integer version; @Temporal( TemporalType.TIMESTAMP ) @Column( name = "creation_timestamp" ) Date creationTimestamp; @Temporal( TemporalType.TIMESTAMP ) @Column( name = "last_update_timestamp" ) Date lastUpdateTimestamp; @Column( name = "metadata_perm_uuid", unique = true, updatable = false, nullable = false ) private String naturalId; @Transient private static final long serialVersionUID = 1L; @Enumerated( EnumType.STRING ) private final Severity severity; @Column( name = "fault_service_name" ) private final String serviceName; @Column( name = "fault_service_full_name" ) private final String serviceFullName; @Column( name = "fault_timestamp" ) private final Date timestamp; @Column( name = "fault_msg_correlation_id" ) private final String correlationId; @Column( name = "fault_event_epoch" ) private final Integer eventEpoch; @Column( name = "fault_service_state" ) private final Component.State eventState; @Column( name = "fault_stack_trace" ) @Lob @Type(type="org.hibernate.type.StringClobType") private String stackString; @Transient private CheckException other; @SuppressWarnings( "unused" ) public CheckException( ) { this( null ); } private CheckException( final String serviceName ) { this.serviceName = serviceName; this.serviceFullName = null; this.severity = null; this.timestamp = null; this.correlationId = null; this.eventEpoch = null; this.eventState = null; this.stackString = null; } CheckException( final ServiceConfiguration config, final Severity severity, final Throwable cause ) { this( config, severity, cause, null ); } // CheckException( final String correlationId, final Throwable cause, final Severity severity, final ServiceConfiguration config ) { // this( config, severity, cause, correlationId ); // } // CheckException( final ServiceConfiguration config, final Severity severity, final Throwable cause, final String correlationId ) { super( cause != null ? cause.getMessage( ) : Exceptions.causeString( cause ) ); if ( cause instanceof CheckException ) { this.initCause( cause ); this.setStackTrace( cause.getStackTrace( ) ); } else if ( cause != null ) { this.initCause( cause ); this.fillInStackTrace( ); } this.severity = severity; this.serviceName = config.getName( ); this.serviceFullName = config.getFullName( ).toString( ); this.correlationId = ( correlationId == null ? UUID.randomUUID( ).toString( ) : correlationId ); this.timestamp = new Date( ); this.eventState = config.lookupState( ); this.eventEpoch = Topology.epoch( ); this.stackString = Exceptions.string( this ); } @PreUpdate @PrePersist public void updateTimeStamps( ) { this.lastUpdateTimestamp = new Date( ); if ( this.creationTimestamp == null ) { this.creationTimestamp = new Date( ); } if ( this.naturalId == null ) { this.naturalId = UUID.randomUUID( ).toString( ); } } public Severity getSeverity( ) { return this.severity; } @Override public Iterator<CheckException> iterator( ) { return new Iterator<CheckException>( ) { CheckException next; { this.next = CheckException.this; } @Override public boolean hasNext( ) { return this.next != null; } @Override public CheckException next( ) { CheckException ret = this.next; this.next = ( ret != null ? ret.other : null ); return ret; } @Override public void remove( ) { LOG.error( "ServiceCheckException iterator does not support remove()" ); } }; } public Date getTimestamp( ) { return this.timestamp; } public String getCorrelationId( ) { return this.correlationId; } public String getServiceName( ) { return this.serviceName; } public int getEventEpoch( ) { return this.eventEpoch; } public Component.State getEventState( ) { return this.eventState; } public String getId( ) { return this.id; } public void setId( final String id ) { this.id = id; } public Integer getVersion( ) { return this.version; } public void setVersion( final Integer version ) { this.version = version; } public Date getCreationTimestamp( ) { return this.creationTimestamp; } public void setCreationTimestamp( final Date creationTimestamp ) { this.creationTimestamp = creationTimestamp; } public Date getLastUpdateTimestamp( ) { return this.lastUpdateTimestamp; } public void setLastUpdateTimestamp( final Date lastUpdateTimestamp ) { this.lastUpdateTimestamp = lastUpdateTimestamp; } public String getNaturalId( ) { return this.naturalId; } public void setNaturalId( final String naturalId ) { this.naturalId = naturalId; } private CheckException getOther( ) { return this.other; } private void setOther( final CheckException other ) { this.other = other; } public String getStackString( ) { return this.stackString; } public String getServiceFullName( ) { return this.serviceFullName; } } /** * @param parent * @param ex * @param failureAction * @return true if the error is fatal and the transition should be aborted */ public static final boolean filter( final ServiceConfiguration parent, final Throwable ex, final Predicate<Throwable>... filters ) { Predicate<Throwable> failureAction; if ( ( filters != null ) && ( filters.length > 0 ) ) { failureAction = filters[0]; } else { failureAction = NoopErrorFilter.INSTANCE; } if ( ex instanceof CheckException ) {//go through all the exceptions and look for things with Severity greater than or equal to ERROR for ( final CheckException checkEx : ( CheckException ) ex ) { if ( checkEx.getSeverity( ).ordinal( ) >= Severity.ERROR.ordinal( ) ) { try { failureAction.apply( ex ); } catch ( final Exception ex1 ) { Logs.extreme( ).error( ex1, ex1 ); } return true; } } return false; } else {//treat generic exceptions as always being Severity.ERROR try { failureAction.apply( ex ); } catch ( final Exception ex1 ) { Logs.extreme( ).error( ex1, ex1 ); } return true; } } public static final boolean filter( final ServiceConfiguration parent, final Throwable ex ) { return filter( parent, ex, NoopErrorFilter.INSTANCE ); } /** * The possible actions are: * - store: for later review, e.g., log analyzer * - log: write to log files at the primary CLC * - describe: make available in euca-describe-* * - ui: notification is presented in the ui at next login (note: this is different than filtering * check-exception history) * - notify: basic notifcation is delivered (i.e., email) * - alert: recurrent/urgent notification is delivered until disabled * * TODO:GRZE: this behaviour should be @Configurable */ public enum Actions { STORE, LOGGING, DESCRIBE, UI, NOTIFY, ALERT } /** * Severity levels which can be used to express the system's reaction to exceptions thrown by * either {@link Bootstrapper#check()} or {@link ServiceBuilder#fireCheck(ServiceConfiguration)}. * The default severity used for unchecked exceptions is {@link Severity#ERROR}. Environmentally * triggered changes to system topology are reported as {@link Severity#URGENT}. * * Severity of the exception determines: * 1. The way the system responds in terms of changing service state/system topology * 2. The length of time for which the record is stored * 3. The means used to deliver notifications to the admin * * TODO:GRZE: this behaviour should be @Configurable */ public enum Severity implements Predicate<CheckException> { TRACE, //ignored DEBUG, //default: store INFO, //default: store, describe WARNING, //default: store, describe, ui, notification ERROR, //default: store, describe, ui, notification URGENT, //default: store, describe, ui, notification, alert FATAL; @Override public boolean apply( CheckException input ) { if ( input == null ) { return false; } else { for ( CheckException ex : input ) { if ( this.equals( ex.getSeverity( ) ) ) { return true; } } return false; } } } public enum Scope { SERVICE, HOST, NETWORK; } private static CheckException chain( final ServiceConfiguration config, final Severity severity, final List<? extends Throwable> exs ) { if ( exs == null || exs.isEmpty( ) ) { return new CheckException( config, Severity.TRACE, new NullPointerException( "Faults.chain called w/ empty list: " + exs ) ); } else { try { CheckException last = null; for ( final Throwable ex : Lists.reverse( exs ) ) { if ( ( last != null ) && ( ex instanceof CheckException ) ) { last.other = ( CheckException ) ex; } else if ( ( last != null ) && !( ex instanceof CheckException ) ) { last.other = new CheckException( config, severity, ex ); } else if ( last == null && ( ex instanceof CheckException ) ) { last = ( CheckException ) ex; } else { last = new CheckException( config, severity, ex ); } } last = ( last != null ? last : new CheckException( config, Severity.TRACE, new NullPointerException( "Faults.chain called w/ empty list: " + exs ) ) ); return last; } catch ( Exception ex ) { LOG.error( "Faults: error in processing previous error: " + ex ); Logs.extreme( ).error( ex, ex ); return new CheckException( config, Severity.ERROR, ex ); } } } public static CheckException failure( final ServiceConfiguration config, final String... messages ) { return failure( config, new RuntimeException( Joiner.on( "\n" ).join( Arrays.asList( messages ) ) ) ); } public static CheckException failure( final ServiceConfiguration config, final Throwable... exs ) { return failure( config, Arrays.asList( exs ) ); } public static CheckException failure( final ServiceConfiguration config, final List<? extends Throwable> exs ) { return chain( config, Severity.ERROR, ( List<Throwable> ) exs ); } public static CheckException advisory( final ServiceConfiguration config, final List<? extends Throwable> exs ) { return chain( config, Severity.INFO, ( List<Throwable> ) exs ); } public static CheckException advisory( final ServiceConfiguration config, final String... messages ) { return advisory( config, new RuntimeException( Joiner.on( "\n" ).join( Arrays.asList( messages ) ) ) ); } public static CheckException advisory( final ServiceConfiguration config, final Throwable... exs ) { return advisory( config, Arrays.asList( exs ) ); } public static CheckException fatal( final ServiceConfiguration config, final List<? extends Throwable> exs ) { return chain( config, Severity.FATAL, ( List<Throwable> ) exs ); } @TypeMapper public enum StatusDetailExceptionRecordMapper implements Function<ServiceStatusDetail, CheckException> { INSTANCE; @Override public CheckException apply( final ServiceStatusDetail input ) { ServiceConfiguration config = null; final String serviceFullName = Strings.nullToEmpty( input.getServiceFullName() ); try { final String serviceName = Strings.nullToEmpty( input.getServiceName() ); config = ServiceConfigurations.lookupByName( serviceName ); } catch ( RuntimeException e ) { for ( Component c : Components.list( ) ) { for ( ServiceConfiguration s : c.services() ) { if ( serviceFullName.equals( s.getFullName().toString() ) ) { config = s; break; } } } if(config==null){ throw e; } } Severity severity = Severity.DEBUG; if ( input.getSeverity( ) != null ) { severity = Severity.valueOf( input.getSeverity( ) ); } CheckException ex = new CheckException( config, severity, new Exception( input.toString( ) ), input.getUuid( ) ); ex.stackString = input.getStackTrace( ); return ex; } } enum StatusToCheckException implements Function<ServiceStatusType, CheckException> { INSTANCE; @Override public CheckException apply( final ServiceStatusType input ) { final List<CheckException> exs = Lists.newArrayList( ); final ServiceConfiguration config = TypeMappers.transform( input.getServiceId( ), ServiceConfiguration.class ); final Component.State serviceState = Component.State.valueOf( input.getLocalState( ) ); final Component.State localState = config.lookupState( ); if ( Component.State.ENABLED.equals( localState ) && !localState.equals( serviceState ) ) { exs.add( failure( config, new IllegalStateException( "State mismatch: local state is " + localState + " and remote state is: " + serviceState ) ) ); } for ( final ServiceStatusDetail detail : input.getStatusDetails( ) ) { final CheckException ex = TypeMappers.transform( detail, CheckException.class ); exs.add( ex ); } if ( exs.isEmpty( ) ) { return new CheckException( config, Severity.DEBUG, new Exception( input.toString( ) ) ); } else { return Faults.chain( config, Severity.ERROR, exs ); } } } public static Function<ServiceStatusType, CheckException> transformToExceptions( ) { return StatusToCheckException.INSTANCE; } private static final SetMultimap<ServiceConfiguration, FaultRecord> serviceExceptions = Multimaps.synchronizedSetMultimap( HashMultimap.<ServiceConfiguration, FaultRecord>create() ); private static final BlockingQueue<FaultRecord> errorQueue = new LinkedTransferQueue<FaultRecord>( ); private static class FaultRecord { private final ServiceConfiguration serviceConfiguration; private final TransitionRecord<ServiceConfiguration, State, Transition> transitionRecord; private final CheckException error; private final Component.State finalState; private FaultRecord( ServiceConfiguration serviceConfiguration, TransitionRecord<ServiceConfiguration, State, Transition> transitionRecord, CheckException error ) { super( ); this.serviceConfiguration = serviceConfiguration; this.finalState = serviceConfiguration.lookupState( ); this.transitionRecord = transitionRecord; this.error = error; } public ServiceConfiguration getServiceConfiguration( ) { return this.serviceConfiguration; } public TransitionRecord<ServiceConfiguration, State, Transition> getTransitionRecord( ) { return this.transitionRecord; } public CheckException getError( ) { return this.error; } private Component.State getFinalState( ) { return this.finalState; } @Override public int hashCode() { return Objects.hashCode( this.serviceConfiguration, this.error.getMessage(), this.finalState ); } @Override public boolean equals( Object obj ) { if ( this == obj ) { return true; } if ( obj == null || getClass() != obj.getClass() ) { return false; } final FaultRecord that = ( FaultRecord ) obj; return Objects.equal( this.serviceConfiguration.getFullName().toString(), that.serviceConfiguration.getFullName().toString() ) && Objects.equal( this.error.getMessage(), that.error.getMessage() ) && Objects.equal( this.finalState, that.finalState ); } } public static void flush( final ServiceConfiguration config ) { serviceExceptions.removeAll( config ); } public static Collection<CheckException> lookup( final ServiceConfiguration config ) { Collection<CheckException> records = Collections2.transform( serviceExceptions.get( config ), new Function<FaultRecord, CheckException>() { @Override public CheckException apply( FaultRecord input ) { return input.getError(); } } ); if ( records != null && !records.isEmpty() ) { return Lists.newArrayList( records ); } else { return Lists.newArrayList( ); } } public static void submit( final ServiceConfiguration parent, TransitionRecord<ServiceConfiguration, State, Transition> transitionRecord, final CheckException errors ) { FaultRecord record = new FaultRecord( parent, transitionRecord, errors ); serviceExceptions.put( parent, record ); if ( errors != null && BootstrapArgs.isCloudController( ) && Bootstrap.isFinished( ) ) { errorQueue.offer( record ); } } public static class FaultNotificationHandler implements EventListener<Hertz>, Callable<Boolean> { private static final AtomicBoolean ready = new AtomicBoolean( true ); private static final AtomicLong lastDigest = new AtomicLong( System.currentTimeMillis( ) ); public static void register( ) { Listeners.register( Hertz.class, new FaultNotificationHandler( ) ); } @Override public void fireEvent( final Hertz event ) { if ( Bootstrap.isOperational( ) && event.isAsserted( Faults.BATCH_DELAY_SECONDS ) && ready.compareAndSet( true, false ) ) { try { Threads.enqueue( Eucalyptus.class, Faults.class, this ); } catch ( final Exception ex ) { ready.set( true ); } } } @Override public Boolean call( ) throws Exception { try { sendFaults( ); sendDigest( ); } finally { ready.set( true ); } return true; } private static void sendDigest( ) { if ( Hosts.isCoordinator( ) && Faults.DIGEST ) { long lastTime = lastDigest.getAndSet( System.currentTimeMillis( ) ); if ( ( lastDigest.get( ) - lastTime ) > Faults.DIGEST_FREQUENCY_HOURS * 60 * 60 * 1000 ) { Date digestDate = new Date( lastDigest.get( ) ); if ( !serviceExceptions.isEmpty( ) || !Faults.DIGEST_ONLY_ON_ERRORS ) { LOG.debug( "Fault notifications: preparing digest for " + digestDate + "." ); try { String subject = Faults.EMAIL_SUBJECT_PREFIX + " system state for " + digestDate; String result = Groovyness.run( SubDirectory.SCRIPTS, "notifications_digest" ); if ( !Strings.isNullOrEmpty( result ) ) { dispatchEmail( subject, result ); } } catch ( Exception ex ) { LOG.error( "Fault notifications: rendering digest failed: " + ex.getMessage( ) ); Logs.extreme( ).error( ex, ex ); } } else { LOG.debug( "Fault notifications: skipping digest for " + digestDate + "." ); } } else { lastDigest.set( lastTime ); } } } private static void sendFaults( ) { LOG.debug( "Fault notifications: waking up to service error queue." ); final List<FaultRecord> pendingFaults = Lists.newArrayList( ); errorQueue.drainTo( pendingFaults ); if ( pendingFaults.isEmpty( ) ) { LOG.debug( "Fault notifications: service error queue is empty... going back to sleep." ); } else { if ( Hosts.isCoordinator( ) ) { String subject = Faults.EMAIL_SUBJECT_PREFIX; List<FaultRecord> noStateChange = Lists.newArrayList( ); List<FaultRecord> stateChange = Lists.newArrayList( ); for ( FaultRecord f : pendingFaults ) { TransitionRecord<ServiceConfiguration, State, Transition> tr = f.getTransitionRecord( ); if ( tr.getRule( ).getFromState( ).equals( f.getFinalState( ) ) ) { noStateChange.add( f ); } else { stateChange.add( f ); subject += " " + f.getServiceConfiguration( ).getName( ) + "->" + f.getFinalState( ); } } if ( stateChange.isEmpty( ) ) { LOG.debug( "Fault notifications: no state changes pending, discarding pending faults" ); } else { try { String result = Groovyness.run( SubDirectory.SCRIPTS, "notifications", new HashMap( ) { { this.put( "faults", pendingFaults ); } } ); if ( !Strings.isNullOrEmpty( result ) ) { dispatchEmail( subject, result ); } } catch ( Exception ex ) { LOG.error( "Fault notifications: rendering notification failed: " + ex.getMessage( ) ); Logs.extreme( ).error( ex, ex ); } } } } } public static void dispatchEmail( String subject, String result ) { LOG.debug( "From: " + Faults.EMAIL_FROM_NAME + " <" + Faults.EMAIL_FROM + ">" ); LOG.debug( "To: " + Faults.EMAIL_TO ); LOG.debug( "Subject: " + subject ); LOG.debug( result ); if ( !Strings.isNullOrEmpty( Faults.EMAIL_TO ) ) { Emails.send( Faults.EMAIL_FROM, Faults.EMAIL_FROM_NAME, Faults.EMAIL_TO, subject, result ); } } } private static final ConcurrentMap<ServiceConfiguration, CheckException> failstopExceptions = Maps.newConcurrentMap( ); public static void flush( ) { failstopExceptions.clear( ); } public static void failstop( ServiceConfiguration key, CheckException checkEx ) { for ( CheckException ex : checkEx ) { if ( Severity.FATAL.equals( ex.getSeverity( ) ) ) { LOG.warn( "FAILSTOP: " + key.getFullName( ) + "=> " + checkEx.getMessage( ) ); failstopExceptions.put( key, checkEx ); return; } } } public static boolean isFailstop( ) { return !failstopExceptions.isEmpty( ); } private static final FaultSubsystemManager faultSubsystemManager = new FaultSubsystemManager(); public static void init() { faultSubsystemManager.init(); } public static FaultBuilder forComponent(Class <? extends ComponentId> componentIdClass) { return new FaultBuilderImpl(faultSubsystemManager, componentIdClass); } public interface FaultBuilder { /** * The fault identifier (Required) * * @param faultId The fault identifier * @return This builder for call chaining */ FaultBuilder havingId(int faultId); /** * Add a variable for the fault log. * * @param name The variable name * @param value The variable value * @return This builder for call chaining */ FaultBuilder withVar(String name, String value); /** * Get a Runnable that will log the fault on the first invocation. * * <p>To log a fault only once retain a reference to the returned Runnable * and call whenever the fault occurs. Invocations after the first will be * ignored.</p> * * @note GRZE: with displeasure I force the return of the fault string for use elsewhere in the name of consistency and dryness. * @return A Runnable to be called to log the fault. */ Callable<String> logOnFirstRun(); /** * Log a fault message with the provided details. * @note GRZE: with displeasure I force the return of the fault string for use elsewhere in the name of consistency and dryness. */ String log(); } }