/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.brooklyn.entity.brooklynnode.effector; import java.io.File; import java.util.Collection; import java.util.Collections; import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.brooklyn.api.effector.Effector; import org.apache.brooklyn.api.entity.Entity; import org.apache.brooklyn.api.entity.EntitySpec; import org.apache.brooklyn.api.entity.Group; import org.apache.brooklyn.api.mgmt.TaskAdaptable; import org.apache.brooklyn.api.mgmt.ha.HighAvailabilityMode; import org.apache.brooklyn.api.mgmt.ha.ManagementNodeState; import org.apache.brooklyn.core.effector.EffectorBody; import org.apache.brooklyn.core.effector.Effectors; import org.apache.brooklyn.core.entity.Attributes; import org.apache.brooklyn.core.entity.EntityPredicates; import org.apache.brooklyn.core.entity.EntityTasks; import org.apache.brooklyn.core.entity.lifecycle.Lifecycle; import org.apache.brooklyn.entity.brooklynnode.BrooklynCluster; import org.apache.brooklyn.entity.brooklynnode.BrooklynNode; import org.apache.brooklyn.entity.brooklynnode.BrooklynCluster.SelectMasterEffector; import org.apache.brooklyn.entity.brooklynnode.BrooklynCluster.UpgradeClusterEffector; import org.apache.brooklyn.entity.brooklynnode.BrooklynNode.SetHighAvailabilityModeEffector; import org.apache.brooklyn.entity.group.DynamicCluster; import org.apache.brooklyn.util.collections.MutableMap; import org.apache.brooklyn.util.core.config.ConfigBag; import org.apache.brooklyn.util.core.task.DynamicTasks; import org.apache.brooklyn.util.core.task.Tasks; import org.apache.brooklyn.util.net.Urls; import org.apache.brooklyn.util.time.Duration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; import com.google.common.base.Predicates; import com.google.common.collect.Collections2; import com.google.common.collect.Iterables; public class BrooklynClusterUpgradeEffectorBody extends EffectorBody<Void> implements UpgradeClusterEffector { private static final Logger log = LoggerFactory.getLogger(BrooklynClusterUpgradeEffectorBody.class); public static final Effector<Void> UPGRADE_CLUSTER = Effectors.effector(UpgradeClusterEffector.UPGRADE_CLUSTER) .impl(new BrooklynClusterUpgradeEffectorBody()).build(); private final AtomicBoolean upgradeInProgress = new AtomicBoolean(); @Override public Void call(ConfigBag parameters) { if (!upgradeInProgress.compareAndSet(false, true)) { throw new IllegalStateException("An upgrade is already in progress."); } EntitySpec<?> origMemberSpec = entity().getConfig(BrooklynCluster.MEMBER_SPEC); Preconditions.checkNotNull(origMemberSpec, BrooklynCluster.MEMBER_SPEC.getName() + " is required for " + UpgradeClusterEffector.class.getName()); log.debug("Upgrading "+entity()+", changing "+BrooklynCluster.MEMBER_SPEC+" from "+origMemberSpec+" / "+origMemberSpec.getConfig()); boolean success = false; try { String newDownloadUrl = parameters.get(DOWNLOAD_URL); EntitySpec<?> newMemberSpec = EntitySpec.create(origMemberSpec); ConfigBag newConfig = ConfigBag.newInstance(); newConfig.putIfNotNull(DOWNLOAD_URL, newDownloadUrl); newConfig.put(BrooklynNode.DISTRO_UPLOAD_URL, inferUploadUrl(newDownloadUrl)); newConfig.putAll(ConfigBag.newInstance(parameters.get(EXTRA_CONFIG)).getAllConfigAsConfigKeyMap()); newMemberSpec.configure(newConfig.getAllConfigAsConfigKeyMap()); entity().config().set(BrooklynCluster.MEMBER_SPEC, newMemberSpec); log.debug("Upgrading "+entity()+", new "+BrooklynCluster.MEMBER_SPEC+": "+newMemberSpec+" / "+newMemberSpec.getConfig()+" (adding: "+newConfig+")"); upgrade(parameters); success = true; } finally { if (!success) { log.debug("Upgrading "+entity()+" failed, will rethrow after restoring "+BrooklynCluster.MEMBER_SPEC+" to: "+origMemberSpec); entity().config().set(BrooklynCluster.MEMBER_SPEC, origMemberSpec); } upgradeInProgress.set(false); } return null; } private String inferUploadUrl(String newDownloadUrl) { if (newDownloadUrl==null) return null; boolean isLocal = "file".equals(Urls.getProtocol(newDownloadUrl)) || new File(newDownloadUrl).exists(); if (isLocal) { return newDownloadUrl; } else { return null; } } protected void upgrade(ConfigBag parameters) { //TODO currently this will fight with auto-scaler policies; they must be turned off for upgrade to work Group cluster = (Group)entity(); Collection<Entity> initialMembers = cluster.getMembers(); int initialClusterSize = initialMembers.size(); if (!BrooklynNodeUpgradeEffectorBody.isPersistenceModeEnabled(cluster)) { // would could try a `forcePersistNow`, but that's sloppy; // for now, require HA/persistence for upgrading DynamicTasks.queue( Tasks.warning("Check persistence", new IllegalStateException("Persistence does not appear to be enabled at this cluster. " + "Cluster upgrade will not succeed unless a custom launch script enables it.")) ); } //TODO we'd like to disable these nodes as standby targets, ie in some 'hot standby but not available for failover' mode //currently if failover happens to a new node, assumptions below may fail and the cluster may require manual repair //1. Initially create a single node to check if it will launch successfully TaskAdaptable<Collection<Entity>> initialNodeTask = DynamicTasks.queue(newCreateNodesTask(1, "Creating first upgraded version node")); //2. If everything is OK with the first node launch the rest as well @SuppressWarnings("unused") TaskAdaptable<Collection<Entity>> remainingNodesTask = DynamicTasks.queue(newCreateNodesTask(initialClusterSize - 1, "Creating remaining upgraded version nodes ("+(initialClusterSize - 1)+")")); //3. Once we have all nodes running without errors switch master DynamicTasks.queue(Effectors.invocation(cluster, BrooklynCluster.SELECT_MASTER, MutableMap.of(SelectMasterEffector.NEW_MASTER_ID, Iterables.getOnlyElement(initialNodeTask.asTask().getUnchecked()).getId()))).asTask().getUnchecked(); //4. Stop the nodes which were running at the start of the upgrade call, but keep them around. // Should we create a quarantine-like zone for old stopped version? // For members that were created meanwhile - they will be using the new version already. If the new version // isn't good then they will fail to start as well, forcing the policies to retry (and succeed once the // URL is reverted). //any other nodes created via other means should also be using the new spec, so initialMembers will be all the old version nodes DynamicTasks.queue(Effectors.invocation(BrooklynNode.STOP_NODE_BUT_LEAVE_APPS, Collections.emptyMap(), initialMembers)).asTask().getUnchecked(); } private TaskAdaptable<Collection<Entity>> newCreateNodesTask(int size, String name) { return Tasks.<Collection<Entity>>builder().displayName(name).body(new CreateNodesCallable(size)).build(); } protected class CreateNodesCallable implements Callable<Collection<Entity>> { private final int size; public CreateNodesCallable(int size) { this.size = size; } @Override public Collection<Entity> call() throws Exception { return createNodes(size); } } protected Collection<Entity> createNodes(int nodeCnt) { DynamicCluster cluster = (DynamicCluster)entity(); //1. Create the nodes Collection<Entity> newNodes = cluster.resizeByDelta(nodeCnt); //2. Wait for them to be RUNNING (or at least STARTING to have completed) // (should already be the case, because above is synchronous and, we think, it will fail if start does not succeed) DynamicTasks.queue(EntityTasks.requiringAttributeEventually(newNodes, Attributes.SERVICE_STATE_ACTUAL, Predicates.not(Predicates.equalTo(Lifecycle.STARTING)), Duration.minutes(30))); //3. Set HOT_STANDBY in case it is not enabled on the command line ... // TODO support via EntitySpec DynamicTasks.queue(Effectors.invocation( BrooklynNode.SET_HIGH_AVAILABILITY_MODE, MutableMap.of(SetHighAvailabilityModeEffector.MODE, HighAvailabilityMode.HOT_STANDBY), newNodes)).asTask().getUnchecked(); //... and wait until all of the nodes change state // TODO fail quicker if state changes to FAILED DynamicTasks.queue(EntityTasks.requiringAttributeEventually(newNodes, BrooklynNode.MANAGEMENT_NODE_STATE, Predicates.equalTo(ManagementNodeState.HOT_STANDBY), Duration.FIVE_MINUTES)); // TODO also check that the nodes created all report the original master, in case persistence changes it //5. Just in case check if all of the nodes are SERVICE_UP (which would rule out ON_FIRE as well) Collection<Entity> failedNodes = Collections2.filter(newNodes, EntityPredicates.attributeEqualTo(BrooklynNode.SERVICE_UP, Boolean.FALSE)); if (!failedNodes.isEmpty()) { throw new IllegalStateException("Nodes " + failedNodes + " are not " + BrooklynNode.SERVICE_UP + " though successfully in " + ManagementNodeState.HOT_STANDBY); } return newNodes; } }