/*************************************************************************
* Copyright 2009-2014 Eucalyptus Systems, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
* Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta
* CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need
* additional information or have any questions.
************************************************************************/
package com.eucalyptus.imaging.backend;
import java.util.Calendar;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import com.eucalyptus.auth.Accounts;
import com.eucalyptus.auth.principal.AccountIdentifiers;
import com.eucalyptus.bootstrap.Bootstrap;
import com.eucalyptus.component.Topology;
import com.eucalyptus.compute.common.Compute;
import com.eucalyptus.compute.common.ResourceTag;
import com.eucalyptus.compute.common.RunningInstancesItemType;
import com.eucalyptus.entities.Entities;
import com.eucalyptus.entities.TransactionResource;
import com.eucalyptus.event.ClockTick;
import com.eucalyptus.event.EventListener;
import com.eucalyptus.event.Listeners;
import com.eucalyptus.imaging.ImagingServiceProperties;
import com.eucalyptus.imaging.common.ImagingBackend;
import com.eucalyptus.resources.client.Ec2Client;
import com.eucalyptus.util.Exceptions;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
/**
* @author Sang-Min Park
*
*/
public class ImagingWorkers {
private static Logger LOG = Logger.getLogger( ImagingWorkers.class );
public static final int WORKER_TIMEOUT_MIN = 10;
private static Set<String> verifiedWorkers = new HashSet<String>();
public static class ImagingWorkerStateManager implements EventListener<ClockTick> {
public static void register( ) {
Listeners.register( ClockTick.class, new ImagingWorkerStateManager() );
}
@Override
public void fireEvent(ClockTick event) {
if (!( Bootstrap.isOperational( ) &&
Topology.isEnabledLocally( ImagingBackend.class ) &&
Topology.isEnabled( Compute.class) ) )
return;
if(!ImagingServiceProperties.HEALTHCHECK)
return;
/// if there's a worker that has not reported for the last {WORKER_TIMEOUT_MIN},
/// reschedule the task assigned to the worker and terminate the instance
try{
final List<ImagingWorker> workers = listWorkers();
final List<ImagingWorker> timedout = Lists.newArrayList();
final List<ImagingWorker> retiring = Lists.newArrayList();
final List<ImagingWorker> toRemove = Lists.newArrayList();
for(final ImagingWorker worker : workers){
if(isTimedOut(worker))
timedout.add(worker);
if(ImagingWorker.STATE.RETIRING.equals(worker.getState()))
retiring.add(worker);
if(ImagingWorker.STATE.DECOMMISSIONED.equals(worker.getState()) && timeToRemove(worker) <= 0)
toRemove.add(worker);
}
for(final ImagingWorker worker : timedout){
LOG.info(String.format("Imaging service worker %s is not responding and might be "
+ "decommissioned in about %d minutes.", worker.getDisplayName(), timeToRemove(worker)));
retireWorker(worker.getDisplayName());
}
for(final ImagingWorker worker : retiring){
final ImagingTask task = ImagingTasks.getConvertingTaskByWorkerId(worker.getDisplayName());
if(task!=null && ImportTaskState.CONVERTING.equals(task.getState())) {
ImagingTasks.killAndRerunTask(task.getDisplayName());
LOG.debug(String.format("Imaging worker task %s is moved back into queue", task.getDisplayName()));
}
decommisionWorker(worker.getDisplayName());
}
for(final ImagingWorker worker : toRemove){
LOG.debug("Forgetting about imaging worker "+worker.getDisplayName());
removeWorker(worker.getDisplayName());
}
}catch(final Exception ex){
LOG.error("Failed to check imaging worker's state", ex);
}
}
}
private static long MINUTE=1000*60l;
private static long timeToRemove(final ImagingWorker worker) {
final Date lastUpdated = worker.getWorkerUpdateTime();
Calendar cal = Calendar.getInstance(); // creates calendar
cal.setTime(lastUpdated); // sets calendar time/date
cal.add(Calendar.MINUTE, 60); // remove records after 1 hour
final Date expirationTime = cal.getTime();
return Math.abs((expirationTime.getTime() - new Date().getTime())/MINUTE);
}
private static boolean isTimedOut(final ImagingWorker worker){
final Date lastUpdated = worker.getWorkerUpdateTime();
Calendar cal = Calendar.getInstance(); // creates calendar
cal.setTime(lastUpdated); // sets calendar time/date
cal.add(Calendar.MINUTE, WORKER_TIMEOUT_MIN); // adds 5 minutes
final Date expirationTime = cal.getTime(); //
return expirationTime.before(new Date());
}
public static boolean hasWorker(final String workerId){
try ( final TransactionResource db =
Entities.transactionFor(ImagingWorker.class ) ) {
try{
final ImagingWorker entity = Entities.uniqueResult(ImagingWorker.named(workerId));
}catch(final Exception ex){
return false;
}
return true;
}
}
public static boolean canAllocate(final String workerId) {
final ImagingWorker worker = getWorker(workerId);
if(worker==null)
return false;
return ImagingWorker.STATE.RUNNING.equals(worker.getState());
}
public static ImagingWorker getWorker(final String workerId){
try ( final TransactionResource db =
Entities.transactionFor(ImagingWorker.class ) ) {
try{
final ImagingWorker entity = Entities.uniqueResult(ImagingWorker.named(workerId));
return entity;
}catch(final Exception ex){
return null;
}
}
}
public static List<ImagingWorker> listWorkers(){
try ( final TransactionResource db =
Entities.transactionFor(ImagingWorker.class ) ) {
try{
final List<ImagingWorker> workers = Entities.query(ImagingWorker.named());
return workers;
}catch(final Exception ex){
throw Exceptions.toUndeclared(ex);
}
}
}
private static final String DEFAULT_LAUNCHER_TAG = "euca-internal-imaging-workers";
private static final Pattern INSTANCE_ID = Pattern.compile( "i-[0-9a-fA-F]{8}(?:[0-9a-fA-F]{9})?" );
public static void verifyWorker(final String instanceId, final String remoteHost) throws Exception {
if (instanceId == null || !INSTANCE_ID.matcher(instanceId).matches())
throw new Exception("Failed to verify imaging worker. The '" + instanceId + "' can't be an instance ID");
if(!verifiedWorkers.contains(instanceId)){
try{
final List<RunningInstancesItemType> instances=
Ec2Client.getInstance().describeInstances(
Accounts.lookupSystemAccountByAlias( AccountIdentifiers.IMAGING_SYSTEM_ACCOUNT ).getUserId( ),
Lists.newArrayList(instanceId));
final RunningInstancesItemType workerInstance = instances.get(0);
boolean tagFound = false;
for(final ResourceTag tag : workerInstance.getTagSet()){
if(DEFAULT_LAUNCHER_TAG.equals(tag.getValue())){
tagFound = true;
break;
}
}
if(!tagFound)
throw new Exception("Instance does not have a proper tag");
if(! (remoteHost.equals(workerInstance.getIpAddress()) || remoteHost.equals(workerInstance.getPrivateIpAddress())))
throw new Exception("Request came from invalid host address: "+remoteHost);
verifiedWorkers.add(instanceId);
}catch(final Exception ex){
throw new Exception("Failed to verify imaging worker", ex);
}
}
}
public static ImagingWorker createWorker(final String workerId){
String availabilityZone = null;
try{
final List<RunningInstancesItemType> instances =
Ec2Client.getInstance().describeInstances(
Accounts.lookupSystemAccountByAlias( AccountIdentifiers.IMAGING_SYSTEM_ACCOUNT ).getUserId( ),
Lists.newArrayList(workerId));
availabilityZone = instances.get(0).getPlacement();
}catch(final Exception ex){
throw Exceptions.toUndeclared("Unable to find the instance named: "+workerId);
}
try ( final TransactionResource db =
Entities.transactionFor(ImagingWorker.class ) ) {
try{
final ImagingWorker entity = Entities.uniqueResult(ImagingWorker.named(workerId));
throw Exceptions.toUndeclared(new Exception("Worker already exists"));
}catch(final NoSuchElementException ex){
final ImagingWorker worker = new ImagingWorker(ImagingWorker.STATE.RUNNING, workerId);
worker.setWorkerUpdateTime();
worker.setAvailabilityZone(availabilityZone);
Entities.persist(worker);
db.commit();
return worker;
}catch(final Exception ex){
throw Exceptions.toUndeclared(ex);
}
}
}
public static void removeWorker(final String workerId){
try ( final TransactionResource db =
Entities.transactionFor(ImagingWorker.class ) ) {
try{
final ImagingWorker entity = Entities.uniqueResult(ImagingWorker.named(workerId));
Entities.delete(entity);
db.commit();
}catch(final Exception ex){
throw Exceptions.toUndeclared(ex);
}
}
}
public static void markUpdate(final String workerId){
// address workers in TIMEOUT state
try ( final TransactionResource db =
Entities.transactionFor(ImagingWorker.class ) ) {
try{
final ImagingWorker entity = Entities.uniqueResult(ImagingWorker.named(workerId));
entity.setWorkerUpdateTime();
Entities.persist(entity);
db.commit();
}catch(final Exception ex){
throw Exceptions.toUndeclared(ex);
}
}
}
private static void setWorkerState(final String workerId, ImagingWorker.STATE state){
try ( final TransactionResource db =
Entities.transactionFor(ImagingWorker.class ) ) {
try{
final ImagingWorker entity = Entities.uniqueResult(ImagingWorker.named(workerId));
entity.setState(state);
Entities.persist(entity);
db.commit();
}catch(final Exception ex){
throw Exceptions.toUndeclared(ex);
}
}
}
public static void retireWorker(final String workerId) {
// check if system knows about instance
List<RunningInstancesItemType> instances = null;
try {
instances = Ec2Client.getInstance().describeInstances(
Accounts.lookupSystemAccountByAlias( AccountIdentifiers.IMAGING_SYSTEM_ACCOUNT ).getUserId( ),
Lists.newArrayList(workerId));
} catch(final Exception ex) {
LOG.error("Can't list instances", ex);
}
if (instances != null && instances.size() == 1) {
setWorkerState(workerId, ImagingWorker.STATE.RETIRING);
} else {
LOG.debug("Forgetting about imaging worker " + workerId);
removeWorker(workerId);
}
}
private static void decommisionWorker(final String workerId){
// terminate instance
// set worker state DECOMMISSIONED
String instanceId = null;
try{
final List<RunningInstancesItemType> instances =
Ec2Client.getInstance().describeInstances(
Accounts.lookupSystemAccountByAlias( AccountIdentifiers.IMAGING_SYSTEM_ACCOUNT ).getUserId( ),
Lists.newArrayList(workerId));
if(instances!=null && instances.size()==1)
instanceId = instances.get(0).getInstanceId();
}catch(final Exception ex){
LOG.error("Can't list instances", ex);
}
if(instanceId!=null){
try{
Ec2Client.getInstance().terminateInstances(
Accounts.lookupSystemAccountByAlias( AccountIdentifiers.IMAGING_SYSTEM_ACCOUNT ).getUserId( ),
Lists.newArrayList(workerId));
LOG.debug("Terminated imaging worker: " + workerId);
}catch(final Exception ex){
throw Exceptions.toUndeclared(ex);
}
}
setWorkerState(workerId, ImagingWorker.STATE.DECOMMISSIONED);
}
private static Set<String> FatalTaskErrors = Sets.newHashSet("FailureToAttachVolume", "FailureToDetachVolume",
"CertificateFailure");
public static boolean isFatalError(final String errorCode){
return FatalTaskErrors.contains(errorCode);
}
}