/*************************************************************************
* Copyright 2009-2012 Eucalyptus Systems, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
* Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta
* CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need
* additional information or have any questions.
*
* This file may incorporate work covered under the following copyright
* and permission notice:
*
* Software License Agreement (BSD License)
*
* Copyright (c) 2008, Regents of the University of California
* All rights reserved.
*
* Redistribution and use of this software in source and binary forms,
* with or without modification, are permitted provided that the
* following conditions are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE. USERS OF THIS SOFTWARE ACKNOWLEDGE
* THE POSSIBLE PRESENCE OF OTHER OPEN SOURCE LICENSED MATERIAL,
* COPYRIGHTED MATERIAL OR PATENTED MATERIAL IN THIS SOFTWARE,
* AND IF ANY SUCH MATERIAL IS DISCOVERED THE PARTY DISCOVERING
* IT MAY INFORM DR. RICH WOLSKI AT THE UNIVERSITY OF CALIFORNIA,
* SANTA BARBARA WHO WILL THEN ASCERTAIN THE MOST APPROPRIATE REMEDY,
* WHICH IN THE REGENTS' DISCRETION MAY INCLUDE, WITHOUT LIMITATION,
* REPLACEMENT OF THE CODE SO IDENTIFIED, LICENSING OF THE CODE SO
* IDENTIFIED, OR WITHDRAWAL OF THE CODE CAPABILITY TO THE EXTENT
* NEEDED TO COMPLY WITH ANY SUCH LICENSES OR RIGHTS.
************************************************************************/
package com.eucalyptus.cluster.proxy.node;
import java.net.URI;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import com.eucalyptus.cluster.common.ClusterController;
import com.eucalyptus.cluster.common.internal.ClusterRegistry;
import com.eucalyptus.component.*;
import com.eucalyptus.empyrean.*;
import com.eucalyptus.system.Threads;
import com.eucalyptus.util.async.AsyncRequests;
import com.eucalyptus.util.fsm.TransitionRecord;
import com.google.common.base.*;
import com.google.common.base.Objects;
import com.google.common.collect.*;
import edu.ucsb.eucalyptus.msgs.BaseMessage;
import org.apache.log4j.Logger;
import com.eucalyptus.cluster.common.internal.Cluster;
import com.eucalyptus.util.Exceptions;
import com.eucalyptus.cluster.common.msgs.NodeInfo;
import com.eucalyptus.cluster.common.msgs.NodeType;
import static com.google.common.collect.Iterables.toArray;
public class Nodes {
private static Logger LOG = Logger.getLogger( Nodes.class );
public static Long REFRESH_TIMEOUT = TimeUnit.MINUTES.toMillis( 10 );
static Function<String, NodeInfo> lookupNodeInfo( final ServiceConfiguration ccConfig ) {
return new Function<String, NodeInfo>( ) {
@Override
public NodeInfo apply( @Nullable String ncHostOrTag ) {
Map<String, NodeInfo> map = lookupAny( ccConfig ).getNodeHostMap( );
if ( map.containsKey( ncHostOrTag ) ) {
return map.get( ncHostOrTag );
} else {
throw new NoSuchElementException( "Failed to lookup node using "
+ ncHostOrTag
+ ". Available nodes are: "
+ Joiner.on( "\n" ).join( map.keySet( ) ) );
}
}
};
}
static Function<NodeInfo, ServiceConfiguration> transformNodeInfo( final ServiceConfiguration ccConfig ) {
return new Function<NodeInfo, ServiceConfiguration>( ) {
@Override
public ServiceConfiguration apply( @Nullable NodeInfo input ) {
ProxyNodeController compId = ComponentIds.lookup( ProxyNodeController.class );
Component comp = Components.lookup( compId );
try {
return comp.lookup( input.getName( ) );
} catch ( final NoSuchElementException ex1 ) {
URI nodeUri = URI.create( input.getServiceTag( ) );
final ServiceBuilder<? extends ServiceConfiguration> builder = ServiceBuilders.lookup( comp.getComponentId( ) );
ServiceConfiguration config = builder.newInstance( ccConfig.getPartition( ),
input.getName( ),
nodeUri.getHost( ),
nodeUri.getPort( ) );
comp.setup( config );
return config;
}
}
};
}
private static Function<String, ServiceConfiguration> lookupNodeServiceConfiguration( ServiceConfiguration ccConfig ) {
return Functions.compose( transformNodeInfo( ccConfig ), lookupNodeInfo( ccConfig ) );
}
public static void updateNodeInfo( ServiceConfiguration ccConfig, List<NodeType> nodes ) {
ConcurrentNavigableMap<String, NodeInfo> clusterNodeMap = lookupAny( ccConfig ).getNodeMap( );
/** prepare key sets for comparison **/
Set<String> knownTags = Sets.newHashSet( clusterNodeMap.keySet( ) );
Set<String> reportedTags = Sets.newHashSet( );
for ( final NodeType node : nodes ) {
reportedTags.add( node.getServiceTag( ) );
}
/** compute intersections and differences **/
Set<String> unreportedTags = Sets.difference( knownTags, reportedTags );
Set<String> newTags = Sets.difference( reportedTags, knownTags );
Set<String> stillKnownTags = Sets.intersection( knownTags, reportedTags );
StringBuilder nodeLog = new StringBuilder( );
/** maybe remove unreported nodes **/
for ( String unreportedTag : unreportedTags ) {
NodeInfo unreportedNode = clusterNodeMap.get( unreportedTag );
if ( unreportedNode != null && ( System.currentTimeMillis( ) - unreportedNode.getLastSeen( ).getTime( ) ) > Nodes.REFRESH_TIMEOUT ) {
Topology.destroy( Components.lookup( ProxyNodeController.class ).lookup( unreportedNode.getName() ) );
NodeInfo removed = clusterNodeMap.remove( unreportedTag );
nodeLog.append( "GONE:" ).append( removed.getName() ).append( ":" ).append( removed.getLastState() ).append( " " );
}
}
/** add new nodes or updated existing node infos **/
Set<NodeInfo> nodesToUpdate = Sets.newHashSet( );
for ( final NodeType node : nodes ) {
try {
String serviceTag = node.getServiceTag( );
if ( newTags.contains( serviceTag ) ) {
clusterNodeMap.putIfAbsent( serviceTag, new NodeInfo( ccConfig.getPartition( ), node ) );
NodeInfo nodeInfo = clusterNodeMap.get( serviceTag );
nodeLog.append( "NEW:" ).append( nodeInfo.getName() ).append( ":" ).append( nodeInfo.getLastState() ).append( " " );
nodesToUpdate.add( nodeInfo );
} else if ( stillKnownTags.contains( serviceTag ) ) {
NodeInfo nodeInfo = clusterNodeMap.get( serviceTag );
nodeInfo.setIqn( node.getIqn( ) );
nodeLog.append( "OLD:" ).append( nodeInfo.getName() ).append( ":" ).append( nodeInfo.getLastState() ).append( " " );
nodesToUpdate.add( nodeInfo );
}
} catch ( NoSuchElementException e ) {
LOG.error( e );
LOG.debug( e, e );
}
}
LOG.debug( "Updated node info map: " + nodeLog.toString() );
try {
Nodes.updateServiceConfiguration( ccConfig, nodesToUpdate );
} catch ( Exception e ) {
if( !Component.State.ENABLED.apply( ccConfig ))
LOG.debug("Error while updating nodes: " + e.getMessage(), e);
}
}
public static ServiceConfiguration lookup( ServiceConfiguration ccConfig, NodeInfo nodeInfo ) throws NoSuchElementException {
return Nodes.transformNodeInfo( ccConfig ).apply( nodeInfo );
}
public static ServiceConfiguration lookup( ServiceConfiguration ccConfig, String hostOrTag ) throws NoSuchElementException {
return Nodes.lookupNodeServiceConfiguration( ccConfig ).apply( hostOrTag );
}
private static void updateServiceConfiguration( final ServiceConfiguration ccConfig, Set<NodeInfo> nodeInfoSet ) throws NoSuchElementException {
Function<NodeInfo, ServiceConfiguration> setupNode = ( Function<NodeInfo, ServiceConfiguration> ) new Function<NodeInfo, ServiceConfiguration>( ) {
@Nullable
@Override
public ServiceConfiguration apply( @Nullable NodeInfo input ) {
if ( Component.State.ENABLED.apply( ccConfig ) && !ccConfig.lookupStateMachine( ).isBusy( ) ) {
ServiceConfiguration ncConfig = Nodes.lookup( ccConfig, input.getName( ) );
Component component = Components.lookup( ProxyNodeController.class );
if ( !component.hasService( ncConfig ) ) {
component.setup( ncConfig );
try {
Topology.disable( ncConfig );
} catch ( Exception e ) {
LOG.debug( e, e );
}
}
return ncConfig;
}
return Nodes.lookup( ccConfig, input.getName( ) );//GRZE: need to return something in this case, even knowing that the state is unhappy.
}
};
Predicate<NodeInfo> disableNodes = ( Predicate<NodeInfo> ) new Predicate<NodeInfo>( ) {
@Override
public boolean apply( @Nullable NodeInfo nodeInfo ) {
try {
Topology.disable( Nodes.lookup( ccConfig, nodeInfo.getName( ) ) );
} catch ( Exception e ) {}
return true;
}
};
if ( Component.State.DISABLED.ordinal( ) >= ccConfig.lookupState( ).ordinal( ) ) {
Iterables.filter( nodeInfoSet, disableNodes );
}
Function<ServiceStatusType, String> statusToName = new Function<ServiceStatusType, String>( ) {
@Nullable
@Override
public String apply( @Nullable ServiceStatusType status ) {
return status.getServiceId( ).getName( );
}
};
Iterable<ServiceConfiguration> nodesConfigs = Iterables.transform( nodeInfoSet, setupNode );
if ( !nodeInfoSet.isEmpty() ) {
DescribeServicesResponseType reply = Nodes.send( new DescribeServicesType( ), toArray( nodesConfigs, ServiceConfiguration.class ) );
Map<String, ServiceStatusType> statusMap = Maps.uniqueIndex( reply.getServiceStatuses( ), statusToName );
Map<String, NodeInfo> nodeInfoMap = Maps.uniqueIndex( nodeInfoSet, new Function<NodeInfo, String>( ) {
@Nullable
@Override
public String apply( @Nullable NodeInfo nodeInfo ) {
return nodeInfo.getName( );
}
} );
for ( ServiceConfiguration ncConfig : nodesConfigs ) {
Component.State reportedState = Component.State.ENABLED;
ServiceStatusType status = statusMap.get( ncConfig.getName( ) );
final NodeInfo nodeInfo = nodeInfoMap.get( ncConfig.getName() );
String lastMessage = null;
Faults.CheckException checkException = null;
TransitionRecord<ServiceConfiguration, Component.State, Component.Transition> tr = null;
try {
lastMessage = Joiner.on( "\n" ).join( status.getDetails() );
tr = ncConfig.lookupStateMachine().getTransitionRecord();
try {
reportedState = Component.State.valueOf( Strings.nullToEmpty( status.getLocalState( ) ).toUpperCase( ) );
lastMessage = Joiner.on('\n').join( lastMessage, "Found service status for " + ncConfig.getName( ) + ": " + reportedState );
} catch ( IllegalArgumentException e ) {
lastMessage = Joiner.on('\n').join( lastMessage, "Failed to get service status for " + ncConfig.getName( ) + "; got " + status.getLocalState( ) );
}
if ( ncConfig.lookupStateMachine().isBusy() ) {
//GRZE: here we skip the state update to avoid a race in the async dispatch of transitions. log any state mismatch.
if ( !ncConfig.lookupState().equals( reportedState ) ) {
lastMessage = Joiner.on('\n').join( lastMessage, "Found state mismatch for node " + ncConfig.getName() + ": reported=" + reportedState + " local=" + ncConfig.getStateMachine() );
} else {
lastMessage = Joiner.on('\n').join( lastMessage, "Found state for node " + ncConfig.getName() + ": reported=" + reportedState + " local=" + ncConfig.getStateMachine() );
}
} else {
try {
if ( Component.State.ENABLED.equals( reportedState ) ) {
Topology.enable( ncConfig );
} else if ( !Component.State.STOPPED.apply( ncConfig ) ) {
//GRZE: Only attempt to reflect the error state when the service is /not/ in the STOPPED state
Topology.disable( ncConfig );
if ( Component.State.NOTREADY.equals( reportedState ) ) {
checkException = Faults.failure( ncConfig, Joiner.on( "," ).join( status.getDetails() ) );
}
} else {
Topology.stop( ncConfig );
}
} catch ( Exception e ) {
LOG.debug( e, e );
if ( checkException != null ) {
LOG.debug( checkException );
}
checkException = Faults.failure( ncConfig, e, Objects.firstNonNull( checkException, Faults.advisory( ncConfig, lastMessage ) ) );
}
}
} finally {
checkException = Objects.firstNonNull( checkException, Faults.advisory( ncConfig, lastMessage ) );
nodeInfo.touch( reportedState, lastMessage, checkException );
Faults.submit( ncConfig, tr, checkException );
}
}
}
}
static <T extends BaseMessage> T send( ServiceTransitionType msg, ServiceConfiguration... configsArr ) throws RuntimeException {
ServiceConfiguration ccConfig = Topology.lookup( ClusterController.class, configsArr[0].lookupPartition() );
if ( Component.State.ENABLED.apply( ccConfig ) ) {
// EUCA-10136: the condition below causes race condition and effectively prevents sending the message
// && !ccConfig.lookupStateMachine( ).isBusy( ) ) {//GRZE: ensure not to trample the CC when it isn't ENABLED
for ( ServiceId serviceId : Iterables.transform( Arrays.asList( configsArr ),
ServiceConfigurations.ServiceConfigurationToServiceId.INSTANCE ) ) {
msg.getServices().add(serviceId);
}
try {
return AsyncRequests.sendSync( ccConfig, msg );//GRZE: this call site is synchronous wrt other CC-bound requests.
} catch ( Exception ex ) {
throw Exceptions.toUndeclared( ex );
}
} else {
throw Exceptions.noSuchElement( "Failed to find cluster controller: " + ccConfig );
}
}
static ServiceConfiguration lookupClusterController( ServiceConfiguration config ) {
return Topology.lookup( ClusterController.class, config.lookupPartition( ) );
}
public static void clusterCleanup( Cluster cluster, Exception e ) {
Threads.enqueue( ProxyNodeController.class, CleanupNodes.class, new CleanupNodes( cluster, e ) );
}
public static class CleanupNodes implements Callable<Collection<NodeInfo>> {
private Cluster cluster;
private Exception e;
private CleanupNodes( Cluster cluster, Exception e ) {
this.cluster = cluster;
this.e = e;
}
@Override
public Collection<NodeInfo> call() throws Exception {
ServiceConfiguration config = cluster.getConfiguration();
final ConcurrentNavigableMap<String,NodeInfo> nodeMap = cluster.getNodeMap();
for ( NodeInfo nodeInfo : nodeMap.values() ) {
try {
ServiceConfiguration ncConfig = lookup( config, nodeInfo );
String lastMessage = Joiner.on( '\n' ).join( nodeInfo.getLastMessage(), e.getMessage() );
Faults.CheckException ex = Faults.failure( ncConfig, lastMessage );
nodeInfo.touch( Component.State.NOTREADY, lastMessage, ex );
if ( Component.State.ENABLED.apply( ncConfig ) ) {
Topology.disable( ncConfig );
}
} catch ( Exception e ) {
LOG.debug( e );
}
}
return nodeMap.values();
}
}
private static Cluster lookupAny( final ServiceConfiguration clusterConfig ) {
return lookupAny( clusterConfig.getName( ) );
}
private static Cluster lookupAny( final String name ) {
try {
return registry( ).lookup( name );
} catch ( final NoSuchElementException ex ) {
return registry( ).lookupDisabled( name );
}
}
private static ClusterRegistry registry( ) {
return ClusterRegistry.getInstance( );
}
}