/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.cloud; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.util.NamedList; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; import static org.apache.solr.common.params.CommonAdminParams.ASYNC; public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final OverseerCollectionMessageHandler ocmh; public ReplaceNodeCmd(OverseerCollectionMessageHandler ocmh) { this.ocmh = ocmh; } @Override public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception { ZkStateReader zkStateReader = ocmh.zkStateReader; ocmh.checkRequired(message, "source", "target"); String source = message.getStr("source"); String target = message.getStr("target"); String async = message.getStr("async"); boolean parallel = message.getBool("parallel", false); ClusterState clusterState = zkStateReader.getClusterState(); if (!clusterState.liveNodesContain(source)) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Source Node: " + source + " is not live"); } if (!clusterState.liveNodesContain(target)) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Target Node: " + target + " is not live"); } List<ZkNodeProps> sourceReplicas = getReplicasOfNode(source, clusterState); List<ZkNodeProps> createdReplicas = new ArrayList<>(); AtomicBoolean anyOneFailed = new AtomicBoolean(false); CountDownLatch countDownLatch = new CountDownLatch(sourceReplicas.size()); for (ZkNodeProps sourceReplica : sourceReplicas) { NamedList nl = new NamedList(); log.info("Going to create replica for collection={} shard={} on node={}", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), target); ZkNodeProps msg = sourceReplica.plus("parallel", String.valueOf(parallel)).plus(CoreAdminParams.NODE, target); if(async!=null) msg.getProperties().put(ASYNC, async); final ZkNodeProps addedReplica = ocmh.addReplica(clusterState, msg, nl, () -> { countDownLatch.countDown(); if (nl.get("failure") != null) { String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" + " on node=%s", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), target); log.warn(errorString); // one replica creation failed. Make the best attempt to // delete all the replicas created so far in the target // and exit synchronized (results) { results.add("failure", errorString); anyOneFailed.set(true); } } else { log.debug("Successfully created replica for collection={} shard={} on node={}", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), target); } }); if (addedReplica != null) { createdReplicas.add(addedReplica); } } log.debug("Waiting for replace node action to complete"); countDownLatch.await(5, TimeUnit.MINUTES); log.debug("Finished waiting for replace node action to complete"); if (anyOneFailed.get()) { log.info("Failed to create some replicas. Cleaning up all replicas on target node"); CountDownLatch cleanupLatch = new CountDownLatch(createdReplicas.size()); for (ZkNodeProps createdReplica : createdReplicas) { NamedList deleteResult = new NamedList(); try { ocmh.deleteReplica(zkStateReader.getClusterState(), createdReplica.plus("parallel", "true"), deleteResult, () -> { cleanupLatch.countDown(); if (deleteResult.get("failure") != null) { synchronized (results) { results.add("failure", "Could not cleanup, because of : " + deleteResult.get("failure")); } } }); } catch (KeeperException e) { cleanupLatch.countDown(); log.warn("Error deleting replica ", e); } catch (Exception e) { log.warn("Error deleting replica ", e); cleanupLatch.countDown(); throw e; } } cleanupLatch.await(5, TimeUnit.MINUTES); } // we have reached this far means all replicas could be recreated //now cleanup the replicas in the source node DeleteNodeCmd.cleanupReplicas(results, state, sourceReplicas, ocmh, source, async); results.add("success", "REPLACENODE action completed successfully from : " + source + " to : " + target); } static List<ZkNodeProps> getReplicasOfNode(String source, ClusterState state) { List<ZkNodeProps> sourceReplicas = new ArrayList<>(); for (Map.Entry<String, DocCollection> e : state.getCollectionsMap().entrySet()) { for (Slice slice : e.getValue().getSlices()) { for (Replica replica : slice.getReplicas()) { if (source.equals(replica.getNodeName())) { ZkNodeProps props = new ZkNodeProps( COLLECTION_PROP, e.getKey(), SHARD_ID_PROP, slice.getName(), ZkStateReader.CORE_NAME_PROP, replica.getCoreName(), ZkStateReader.REPLICA_PROP, replica.getName(), CoreAdminParams.NODE, source); sourceReplicas.add(props ); } } } } return sourceReplicas; } }