AdmissionControl.java example

Explorer
eucalyptus-master
- clc
  - modules
/*************************************************************************
 * Copyright 2009-2016 Eucalyptus Systems, Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see http://www.gnu.org/licenses/.
 *
 * Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta
 * CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need
 * additional information or have any questions.
 *
 * This file may incorporate work covered under the following copyright
 * and permission notice:
 *
 *   Software License Agreement (BSD License)
 *
 *   Copyright (c) 2008, Regents of the University of California
 *   All rights reserved.
 *
 *   Redistribution and use of this software in source and binary forms,
 *   with or without modification, are permitted provided that the
 *   following conditions are met:
 *
 *     Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *     Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer
 *     in the documentation and/or other materials provided with the
 *     distribution.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 *   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 *   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 *   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 *   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 *   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 *   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 *   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 *   POSSIBILITY OF SUCH DAMAGE. USERS OF THIS SOFTWARE ACKNOWLEDGE
 *   THE POSSIBLE PRESENCE OF OTHER OPEN SOURCE LICENSED MATERIAL,
 *   COPYRIGHTED MATERIAL OR PATENTED MATERIAL IN THIS SOFTWARE,
 *   AND IF ANY SUCH MATERIAL IS DISCOVERED THE PARTY DISCOVERING
 *   IT MAY INFORM DR. RICH WOLSKI AT THE UNIVERSITY OF CALIFORNIA,
 *   SANTA BARBARA WHO WILL THEN ASCERTAIN THE MOST APPROPRIATE REMEDY,
 *   WHICH IN THE REGENTS' DISCRETION MAY INCLUDE, WITHOUT LIMITATION,
 *   REPLACEMENT OF THE CODE SO IDENTIFIED, LICENSING OF THE CODE SO
 *   IDENTIFIED, OR WITHDRAWAL OF THE CODE CAPABILITY TO THE EXTENT
 *   NEEDED TO COMPLY WITH ANY SUCH LICENSES OR RIGHTS.
 ************************************************************************/

package com.eucalyptus.cloud.run;

import static com.eucalyptus.cloud.VmInstanceLifecycleHelpers.NetworkResourceVmInstanceLifecycleHelper;
import static com.eucalyptus.util.RestrictedTypes.BatchAllocator;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import javax.annotation.Nullable;
import javax.persistence.EntityTransaction;

import com.eucalyptus.auth.AuthException;
import com.eucalyptus.cloud.VmInstanceToken;
import com.eucalyptus.cluster.Clusters;
import com.eucalyptus.compute.common.CloudMetadataLimitedType;
import com.eucalyptus.compute.common.internal.vmtypes.VmType;
import com.google.common.base.Function;
import org.apache.log4j.Logger;
import com.eucalyptus.blockstorage.Storage;
import com.eucalyptus.cloud.VmInstanceLifecycleHelpers;
import com.eucalyptus.cloud.VmInstanceLifecycleHelper;
import com.eucalyptus.cloud.run.Allocations.Allocation;
import com.eucalyptus.compute.common.internal.util.IllegalMetadataAccessException;
import com.eucalyptus.compute.common.internal.util.NotEnoughResourcesException;
import com.eucalyptus.cluster.common.internal.Cluster;
import com.eucalyptus.cluster.common.internal.ResourceState;
import com.eucalyptus.cluster.common.internal.ResourceState.VmTypeAvailability;
import com.eucalyptus.component.Partition;
import com.eucalyptus.component.Partitions;
import com.eucalyptus.component.ServiceConfiguration;
import com.eucalyptus.component.Topology;
import com.eucalyptus.cluster.common.ClusterController;
import com.eucalyptus.compute.common.CloudMetadata;
import com.eucalyptus.compute.common.network.DnsHostNamesFeature;
import com.eucalyptus.compute.common.network.NetworkFeature;
import com.eucalyptus.compute.common.network.NetworkResource;
import com.eucalyptus.compute.common.network.Networking;
import com.eucalyptus.compute.common.network.PrepareNetworkResourcesResultType;
import com.eucalyptus.compute.common.network.PrepareNetworkResourcesType;
import com.eucalyptus.context.ServiceStateException;
import com.eucalyptus.entities.Entities;
import com.eucalyptus.compute.common.internal.images.BlockStorageImageInfo;
import com.eucalyptus.compute.common.internal.network.NetworkGroup;
import com.eucalyptus.records.EventRecord;
import com.eucalyptus.records.EventType;
import com.eucalyptus.records.Logs;
import com.eucalyptus.scripting.ScriptExecutionFailedException;
import com.eucalyptus.util.EucalyptusCloudException;
import com.eucalyptus.util.Exceptions;
import com.eucalyptus.util.HasName;
import com.eucalyptus.util.LogUtil;
import com.eucalyptus.util.RestrictedTypes;
import com.eucalyptus.compute.common.internal.vm.VmInstance;
import com.eucalyptus.vmtypes.VmTypes;
import com.google.common.base.MoreObjects;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;

public class AdmissionControl {
  private static Logger LOG = Logger.getLogger( AdmissionControl.class );
  
  public static Predicate<Allocation> run( ) {
    return RunAdmissionControl.INSTANCE;
  }

  public static Predicate<Allocation> restore( ) {
    return Restore.INSTANCE;
  }

  enum RunAdmissionControl implements Predicate<Allocation> {
    INSTANCE;
    
    @Override
    public boolean apply( Allocation allocInfo ) {
      if ( EventRecord.isTraceEnabled( AdmissionControl.class ) ) {
        EventRecord.here( AdmissionControl.class, EventType.VM_RESERVED, LogUtil.dumpObject( allocInfo ) ).trace( );
      }
      List<ResourceAllocator> finished = Lists.newArrayList( );
      EntityTransaction db = Entities.get( NetworkGroup.class );
      try {
        for ( ResourceAllocator allocator : allocators ) {
          runAllocatorSafely( allocInfo, allocator );
          finished.add( allocator );
        }
        db.commit( );
        return true;
      } catch ( Exception ex ) {
        Logs.exhaust( ).error( ex, ex );
        rollbackAllocations( allocInfo, finished, ex );
        db.rollback( );
        throw Exceptions.toUndeclared( new NotEnoughResourcesException( Exceptions.getCauseMessage( ex ), ex ) );
      }
    }
    
  }

  enum Restore implements Predicate<Allocation> {
    INSTANCE;

    @Override
    public boolean apply( Allocation allocInfo ) {
      List<ResourceAllocator> finished = Lists.newArrayList( );
      EntityTransaction db = Entities.get( NetworkGroup.class );
      try {
        for ( ResourceAllocator allocator : restorers ) {
          runAllocatorSafely( allocInfo, allocator );
          finished.add( allocator );
        }
        db.commit( );
        return true;
      } catch ( Exception ex ) {
        Logs.exhaust( ).error( ex, ex );
        rollbackAllocations( allocInfo, finished, ex );
        db.rollback( );
        throw Exceptions.toUndeclared( new NotEnoughResourcesException( ex.getMessage( ), ex ) );
      }
    }
  }

  private static void rollbackAllocations( Allocation allocInfo, List<ResourceAllocator> finished, Exception e ) {
    for ( ResourceAllocator rollback : Lists.reverse( finished ) ) {
      try {
        rollback.fail( allocInfo, e );
      } catch ( Exception e1 ) {
        LOG.debug( e1, e1 );
      }
    }
  }
  
  private static void runAllocatorSafely( Allocation allocInfo, ResourceAllocator allocator ) throws Exception {
    try {
      allocator.allocate( allocInfo );
    } catch ( ScriptExecutionFailedException e ) {
      if ( e.getCause( ) != null ) {
        throw new EucalyptusCloudException( e.getCause( ).getMessage( ), e.getCause( ) );
      } else {
        throw new EucalyptusCloudException( e.getMessage( ), e );
      }
    } catch ( Exception e ) {
      LOG.debug( e, e );
      try {
        allocator.fail( allocInfo, e );
      } catch ( Exception e1 ) {
        LOG.debug( e1, e1 );
      }
      throw e;
    }
  }
  
  private interface ResourceAllocator {
    public void allocate( Allocation allocInfo ) throws Exception;
    
    public void fail( Allocation allocInfo, Throwable t );
    
  }
  
  private static final List<ResourceAllocator> allocators = ImmutableList.<ResourceAllocator>of(
      NodeResourceAllocator.INSTANCE,
      NetworkingAllocator.INSTANCE
  );

  private static final List<ResourceAllocator> restorers = ImmutableList.<ResourceAllocator>of(
      NetworkingAllocator.INSTANCE
  );

  enum NodeResourceAllocator implements ResourceAllocator {
    INSTANCE;
    private List<VmInstanceToken> requestResourceToken( final Allocation allocInfo, final int tryAmount, final int maxAmount ) throws Exception {
      ServiceConfiguration config = Topology.lookup( ClusterController.class, allocInfo.getPartition( ) );
      Cluster cluster = Clusters.lookupAny( config );
      /**
       * TODO:GRZE: this is the call path which needs to trigger gating.  
       * It shouldn't be handled directly here, but instead be handled in {@link ResourceState#requestResourceAllocation().
       * 
       */
      if ( cluster.getGateLock( ).readLock( ).tryLock( 60, TimeUnit.SECONDS ) ) {
        try {
          final ResourceState state = cluster.getNodeState( );
          /**
           * NOTE: If the defined instance type has an ordering conflict w/ some other type then it
           * isn't safe to service TWO requests which use differing types during the same resource refresh
           * duty cycle.
           * This determines whether or not an asynchronous allocation is safe to do for the
           * request instance type or whether a synchronous resource availability refresh is needed.
           * 
           */
          boolean unorderedType = VmTypes.isUnorderedType( allocInfo.getVmType( ) );
          boolean forceResourceRefresh = state.hasUnorderedTokens( ) || unorderedType;
          /**
           * GRZE: if the vm type is not "nicely" ordered then we force a refresh of the actual
           * cluster state. Note: we already hold the cluster gating lock here so this update will
           * be mutual exclusive wrt both resource allocations and cluster state updates.
           */
          if ( forceResourceRefresh ) {
            cluster.refreshResources( );
          }
          final BatchAllocator<VmInstanceToken> allocator = new BatchAllocator<VmInstanceToken>( ) {
            @Override
            public List<VmInstanceToken> allocate( int min, int max ) {
              try {
              // do quotas for "active" instances
                RestrictedTypes.allocateMeasurableResource(Long.valueOf(1L*max),
                  new Function<Long, CloudMetadataLimitedType.VmInstanceActiveMetadata>() {
                    @Nullable
                    @Override
                    public CloudMetadataLimitedType.VmInstanceActiveMetadata apply(@Nullable Long amount) {
                      return new CloudMetadataLimitedType.VmInstanceActiveMetadata() {
                      }; // kind of a marker for active instances
                    }
                  });
               // do quotas for instance specific items (cpu, memory, disk)
                RestrictedTypes.allocateMeasurableResource(max * Long.valueOf(allocInfo.getVmType().getCpu().longValue()),
                  new Function<Long, CloudMetadataLimitedType.CpuMetadata>() {
                    @Nullable
                    @Override
                    public CloudMetadataLimitedType.CpuMetadata apply(@Nullable Long amount) {
                      return new CloudMetadataLimitedType.CpuMetadata() {
                      }; // kind of a marker for cpu
                    }
                  });
                RestrictedTypes.allocateMeasurableResource(max * Long.valueOf(allocInfo.getVmType().getMemory().longValue()),
                  new Function<Long, CloudMetadataLimitedType.MemoryMetadata>() {
                    @Nullable
                    @Override
                    public CloudMetadataLimitedType.MemoryMetadata apply(@Nullable Long amount) {
                      return new CloudMetadataLimitedType.MemoryMetadata() {
                      }; // kind of a marker for memory
                    }
                  });
                RestrictedTypes.allocateMeasurableResource(max * Long.valueOf(allocInfo.getVmType().getDisk().longValue()),
                  new Function<Long, CloudMetadataLimitedType.DiskMetadata>() {
                    @Nullable
                    @Override
                    public CloudMetadataLimitedType.DiskMetadata apply(@Nullable Long amount) {
                      return new CloudMetadataLimitedType.DiskMetadata() {
                      }; // kind of a marker for disk
                    }
                  });
                final List<VmInstanceToken> ret = state.requestResourceAllocation( allocInfo.getVmType( ), min, max, new Supplier<VmInstanceToken>( ) {
                  private int count = 0;
                  @Override
                  public VmInstanceToken get( ) {
                    return new VmInstanceToken( allocInfo, count++ );
                  }
                } );
                allocInfo.getAllocationTokens().addAll( ret );
                return ret;
              } catch ( final NotEnoughResourcesException | AuthException e ) {
                throw Exceptions.toUndeclared( e );
              }
            }
          };

          if ( allocInfo.getAllocationType( ) == Allocations.AllocationType.Start &&
              maxAmount==1 && allocInfo.getInstanceIds( ).size( ) == 1 ) {
            RestrictedTypes.reallocateUnitlessResource( CloudMetadata.VmInstanceMetadata.class, allocator );
          } else {
            RestrictedTypes.allocateUnitlessResources(
                CloudMetadata.VmInstanceMetadata.class,
                tryAmount,
                maxAmount,
                allocator,
                allocInfo.exampleInstanceResource( maxAmount==1 ) );
          }
          return allocInfo.getAllocationTokens( );
        } finally {
          cluster.getGateLock( ).readLock( ).unlock( );
        }
      } else {
        throw new ServiceStateException( "Failed to allocate resources in the zone " + cluster.getPartition( ) + ", it is currently locked for maintenance." );
      }
    }
    
    @Override
    public void allocate( Allocation allocInfo ) throws Exception {
      Partition reqPartition = allocInfo.getPartition();
      String zoneName = reqPartition.getName( );
      VmType vmType = allocInfo.getVmType( );
      
      /* Validate min and max amount */
      final int minAmount = allocInfo.getMinCount( );
      final int maxAmount = allocInfo.getMaxCount( );
      if(minAmount > maxAmount)
    	  throw new RuntimeException("Maximum instance count must not be smaller than minimum instance count");
      
      /* Retrieve our context and list of clusters associated with this zone */
      List<Cluster> authorizedClusters = this.doPrivilegedLookup( zoneName, vmType );
      
      int remaining = maxAmount;
      int allocated = 0;
      int available;
      
      LOG.info( "Found authorized clusters: " + Iterables.transform( authorizedClusters, HasName.GET_NAME ) );
      
      /* Do we have any VM available throughout our clusters? */
      if ( ( available = checkAvailability( vmType, authorizedClusters ) ) < minAmount ) {
        throw new NotEnoughResourcesException( "Not enough resources (" + available + " in " + zoneName + " < " + minAmount + "): vm instances." );
      } else {
        for ( Cluster cluster : authorizedClusters ) {
          if ( remaining <= 0 ) {
            break;
          } else {
            ResourceState state = cluster.getNodeState( );
            Partition partition = cluster.getConfiguration( ).lookupPartition( );
            
            /* Has a partition been set if the AZ was not specified? */
            if( allocInfo.getPartition( ).equals( Partition.DEFAULT ) ) {
            	/* 
            	 * Ok, do we have enough slots in this partition to support our request? We should have at least
            	 * the minimum. The list is sorted in order of resource availability from the cluster with the most 
            	 * available to the cluster with the least amount available. This is why we don't check against the
            	 * maxAmount value since its a best effort at this point. If we select the partition here and we
            	 * can't fit maxAmount, based on the sorting order, the next partition will not fit maxAmount anyway. 
            	 */
            	int zoneAvailable = checkZoneAvailability( vmType, partition, authorizedClusters );
            	if( zoneAvailable < minAmount )
            	  continue;
            	
            	/* Lets use this partition */
                allocInfo.setPartition( partition );
            }
            else if( !allocInfo.getPartition( ).equals( partition ) ) {
              /* We should only pick clusters that are part of the selected AZ */
          	  continue;
            }

            if ( !RestrictedTypes.filterPrivileged( ).apply( allocInfo.exampleInstanceResource( maxAmount==1 )) ) {
              throw new IllegalMetadataAccessException( "Instance resource denied." );
            }

            if ( allocInfo.getBootSet( ).getMachine( ) instanceof BlockStorageImageInfo ) {
              try {
                Topology.lookup( Storage.class, partition );
              } catch ( Exception ex ) {
                allocInfo.abort( );
                allocInfo.setPartition( reqPartition );
                throw new NotEnoughResourcesException( "Not enough resources: Cannot run EBS instances in partition w/o a storage controller: " + ex.getMessage( ), ex );
              }
            }
            
            try {
              int tryAmount = ( remaining > state.getAvailability( vmType ).getAvailable( ) )
                ? state.getAvailability( vmType ).getAvailable( )
                : remaining;
              
              List<VmInstanceToken> tokens = this.requestResourceToken( allocInfo, tryAmount, maxAmount );
              remaining -= tokens.size( );
              allocated += tokens.size( );
            } catch ( Exception t ) {
              LOG.error( t );
              Logs.extreme( ).error( t, t );
              
              allocInfo.abort( );
              allocInfo.setPartition( reqPartition );
              
              /* if we still have some allocation remaining AND no more resources are available */
              if ( ( ( available = checkZoneAvailability( vmType, partition, authorizedClusters ) ) < remaining ) && ( remaining > 0 ) ) {
                throw new NotEnoughResourcesException( "Not enough resources (" + available + " in " + zoneName + " < " + minAmount + "): vm instances.", t );
              } else {
                throw new NotEnoughResourcesException( t.getMessage(), t );
              }
            }
          }
        }
        
        /* Were we able to meet our minimum requirements? */
        if ( ( allocated < minAmount) && ( remaining > 0 ) ) {
          allocInfo.abort( );
          allocInfo.setPartition( reqPartition );
          
          if( reqPartition.equals( Partition.DEFAULT ) ) {
            throw new NotEnoughResourcesException( "Not enough resources available in all zone for " + minAmount + "): vm instances." );
          }
          else {
        	available = checkZoneAvailability( vmType, reqPartition, authorizedClusters );
            throw new NotEnoughResourcesException( "Not enough resources (" + available + " in " + zoneName + " < " + minAmount + "): vm instances." );
          }
        }
      }
    }
    
    private int checkAvailability( VmType vmType, List<Cluster> authorizedClusters ) throws NotEnoughResourcesException {
      int available = 0;
      for ( Cluster authorizedCluster : authorizedClusters ) {
        VmTypeAvailability vmAvailability = authorizedCluster.getNodeState( ).getAvailability( vmType );
        available += vmAvailability.getAvailable( );
        LOG.info( "Availability: " + authorizedCluster.getName( ) + " -> " + vmAvailability.getAvailable( ) );
      }
      return available;
    }
    
    private int checkZoneAvailability( VmType vmType, Partition partition, List<Cluster> authorizedClusters ) throws NotEnoughResourcesException {
      int available = 0;
      for ( Cluster authorizedCluster : authorizedClusters ) {
    	if( !authorizedCluster.getConfiguration( ).lookupPartition( ).equals( partition ) )
    		continue;
    	
        VmTypeAvailability vmAvailability = authorizedCluster.getNodeState( ).getAvailability( vmType );
        available += vmAvailability.getAvailable( );
        LOG.info( "Availability: " + authorizedCluster.getName( ) + " -> " + vmAvailability.getAvailable( ) );
      }
      return available;
    }
      
    private List<Cluster> doPrivilegedLookup( String partitionName, VmType vmType ) throws NotEnoughResourcesException {
      if ( Partition.DEFAULT_NAME.equals( partitionName ) ) {
        Iterable<Cluster> authorizedClusters = Clusters.stream( ).filter( RestrictedTypes.filterPrivilegedWithoutOwner( ) );
        Multimap<VmTypeAvailability, Cluster> sorted = TreeMultimap.create( );
        for ( Cluster c : authorizedClusters ) {
          sorted.put( c.getNodeState( ).getAvailability( vmType ), c );
        }
        if ( sorted.isEmpty( ) ) {
          throw new NotEnoughResourcesException( "Not enough resources: no availability zone is available in which you have permissions to run instances." );
        } else {
          return Lists.newArrayList( sorted.values( ) );
        }
      } else {
        ServiceConfiguration ccConfig = Topology.lookup( ClusterController.class, Partitions.lookupByName( partitionName ) );
        Cluster cluster = Clusters.lookupAny( ccConfig );
        if ( cluster == null ) {
          throw new NotEnoughResourcesException( "Can't find cluster " + partitionName );
        }
        if ( ! RestrictedTypes.filterPrivilegedWithoutOwner( ).apply( cluster ) ) {
          throw new NotEnoughResourcesException( "Not authorized to use cluster " + partitionName );
        }
        return Lists.newArrayList( cluster );
      }
    }
    
    @Override
    public void fail( Allocation allocInfo, Throwable t ) {
      allocInfo.abort( );
    }
    
  }
  
  enum NetworkingAllocator implements ResourceAllocator {
    INSTANCE;

    @Override
    public void allocate( Allocation allocInfo ) throws Exception {
      try {
        final VmInstanceLifecycleHelper helper = VmInstanceLifecycleHelpers.get( );

        final PrepareNetworkResourcesType request = new PrepareNetworkResourcesType( );
        request.setAvailabilityZone( allocInfo.getPartition( ).getName( ) );
        request.setFeatures( Lists.<NetworkFeature>newArrayList( new DnsHostNamesFeature( ) ) );
        helper.prepareNetworkAllocation( allocInfo, request );
        final PrepareNetworkResourcesResultType result = Networking.getInstance().prepare( request ) ;

        for ( final VmInstanceToken token : allocInfo.getAllocationTokens( ) ) {
          for ( final NetworkResource networkResource : result.getResources( ) ) {
            if ( token.getInstanceId( ).equals( networkResource.getOwnerId( ) ) ) {
              token.getAttribute( NetworkResourceVmInstanceLifecycleHelper.NetworkResourcesKey ).add( networkResource );
            }
          }
        }

        helper.verifyNetworkAllocation( allocInfo, result );
      } catch ( Exception e ) {
        throw MoreObjects.firstNonNull( Exceptions.findCause( e, NotEnoughResourcesException.class ), e );
      }
    }
    
    @Override
    public void fail( Allocation allocInfo, Throwable t ) {
      allocInfo.abort( );
    }
  }
}