/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.recovery;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.shard.IndexEventListener;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportChannel;
import org.elasticsearch.transport.TransportRequestHandler;
import org.elasticsearch.transport.TransportService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Supplier;
/**
* The source recovery accepts recovery requests from other peer shards and start the recovery process from this
* source shard to the target shard.
*/
public class PeerRecoverySourceService extends AbstractComponent implements IndexEventListener {
public static class Actions {
public static final String START_RECOVERY = "internal:index/shard/recovery/start_recovery";
}
private final TransportService transportService;
private final IndicesService indicesService;
private final RecoverySettings recoverySettings;
private final ClusterService clusterService;
private final OngoingRecoveries ongoingRecoveries = new OngoingRecoveries();
@Inject
public PeerRecoverySourceService(Settings settings, TransportService transportService, IndicesService indicesService,
RecoverySettings recoverySettings, ClusterService clusterService) {
super(settings);
this.transportService = transportService;
this.indicesService = indicesService;
this.clusterService = clusterService;
this.recoverySettings = recoverySettings;
transportService.registerRequestHandler(Actions.START_RECOVERY, StartRecoveryRequest::new, ThreadPool.Names.GENERIC, new StartRecoveryTransportRequestHandler());
}
@Override
public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard,
Settings indexSettings) {
if (indexShard != null) {
ongoingRecoveries.cancel(indexShard, "shard is closed");
}
}
private RecoveryResponse recover(final StartRecoveryRequest request) throws IOException {
final IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
final IndexShard shard = indexService.getShard(request.shardId().id());
// starting recovery from that our (the source) shard state is marking the shard to be in recovery mode as well, otherwise
// the index operations will not be routed to it properly
RoutingNode node = clusterService.state().getRoutingNodes().node(request.targetNode().getId());
if (node == null) {
logger.debug("delaying recovery of {} as source node {} is unknown", request.shardId(), request.targetNode());
throw new DelayRecoveryException("source node does not have the node [" + request.targetNode() + "] in its state yet..");
}
ShardRouting routingEntry = shard.routingEntry();
if (request.isPrimaryRelocation() && (routingEntry.relocating() == false || routingEntry.relocatingNodeId().equals(request.targetNode().getId()) == false)) {
logger.debug("delaying recovery of {} as source shard is not marked yet as relocating to {}", request.shardId(), request.targetNode());
throw new DelayRecoveryException("source shard is not marked yet as relocating to [" + request.targetNode() + "]");
}
ShardRouting targetShardRouting = node.getByShardId(request.shardId());
if (targetShardRouting == null) {
logger.debug("delaying recovery of {} as it is not listed as assigned to target node {}", request.shardId(), request.targetNode());
throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node");
}
if (!targetShardRouting.initializing()) {
logger.debug("delaying recovery of {} as it is not listed as initializing on the target node {}. known shards state is [{}]",
request.shardId(), request.targetNode(), targetShardRouting.state());
throw new DelayRecoveryException("source node has the state of the target shard to be [" + targetShardRouting.state() + "], expecting to be [initializing]");
}
if (request.targetAllocationId().equals(targetShardRouting.allocationId().getId()) == false) {
logger.debug("delaying recovery of {} due to target allocation id mismatch (expected: [{}], but was: [{}])",
request.shardId(), request.targetAllocationId(), targetShardRouting.allocationId().getId());
throw new DelayRecoveryException("source node has the state of the target shard to have allocation id [" +
targetShardRouting.allocationId().getId() + "], expecting to be [" + request.targetAllocationId() + "]");
}
RecoverySourceHandler handler = ongoingRecoveries.addNewRecovery(request, shard);
logger.trace("[{}][{}] starting recovery to {}", request.shardId().getIndex().getName(), request.shardId().id(), request.targetNode());
try {
return handler.recoverToTarget();
} finally {
ongoingRecoveries.remove(shard, handler);
}
}
class StartRecoveryTransportRequestHandler implements TransportRequestHandler<StartRecoveryRequest> {
@Override
public void messageReceived(final StartRecoveryRequest request, final TransportChannel channel) throws Exception {
RecoveryResponse response = recover(request);
channel.sendResponse(response);
}
}
private final class OngoingRecoveries {
private final Map<IndexShard, ShardRecoveryContext> ongoingRecoveries = new HashMap<>();
synchronized RecoverySourceHandler addNewRecovery(StartRecoveryRequest request, IndexShard shard) {
final ShardRecoveryContext shardContext = ongoingRecoveries.computeIfAbsent(shard, s -> new ShardRecoveryContext());
RecoverySourceHandler handler = shardContext.addNewRecovery(request, shard);
shard.recoveryStats().incCurrentAsSource();
return handler;
}
synchronized void remove(IndexShard shard, RecoverySourceHandler handler) {
final ShardRecoveryContext shardRecoveryContext = ongoingRecoveries.get(shard);
assert shardRecoveryContext != null : "Shard was not registered [" + shard + "]";
boolean remove = shardRecoveryContext.recoveryHandlers.remove(handler);
assert remove : "Handler was not registered [" + handler + "]";
if (remove) {
shard.recoveryStats().decCurrentAsSource();
}
if (shardRecoveryContext.recoveryHandlers.isEmpty()) {
ongoingRecoveries.remove(shard);
assert shardRecoveryContext.onNewRecoveryException == null;
}
}
synchronized void cancel(IndexShard shard, String reason) {
final ShardRecoveryContext shardRecoveryContext = ongoingRecoveries.get(shard);
if (shardRecoveryContext != null) {
final List<Exception> failures = new ArrayList<>();
for (RecoverySourceHandler handlers : shardRecoveryContext.recoveryHandlers) {
try {
handlers.cancel(reason);
} catch (Exception ex) {
failures.add(ex);
} finally {
shard.recoveryStats().decCurrentAsSource();
}
}
ExceptionsHelper.maybeThrowRuntimeAndSuppress(failures);
}
}
private final class ShardRecoveryContext {
final Set<RecoverySourceHandler> recoveryHandlers = new HashSet<>();
@Nullable
private DelayRecoveryException onNewRecoveryException;
/**
* Adds recovery source handler if recoveries are not delayed from starting (see also {@link #delayNewRecoveries}.
* Throws {@link DelayRecoveryException} if new recoveries are delayed from starting.
*/
synchronized RecoverySourceHandler addNewRecovery(StartRecoveryRequest request, IndexShard shard) {
if (onNewRecoveryException != null) {
throw onNewRecoveryException;
}
RecoverySourceHandler handler = createRecoverySourceHandler(request, shard);
recoveryHandlers.add(handler);
return handler;
}
private RecoverySourceHandler createRecoverySourceHandler(StartRecoveryRequest request, IndexShard shard) {
RecoverySourceHandler handler;
final RemoteRecoveryTargetHandler recoveryTarget =
new RemoteRecoveryTargetHandler(request.recoveryId(), request.shardId(), transportService,
request.targetNode(), recoverySettings, throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime));
Supplier<Long> currentClusterStateVersionSupplier = () -> clusterService.state().getVersion();
handler = new RecoverySourceHandler(shard, recoveryTarget, request, currentClusterStateVersionSupplier,
this::delayNewRecoveries, recoverySettings.getChunkSize().bytesAsInt(), settings);
return handler;
}
/**
* Makes new recoveries throw a {@link DelayRecoveryException} with the provided message.
*
* Throws {@link IllegalStateException} if new recoveries are already being delayed.
*/
synchronized Releasable delayNewRecoveries(String exceptionMessage) throws IllegalStateException {
if (onNewRecoveryException != null) {
throw new IllegalStateException("already delaying recoveries");
}
onNewRecoveryException = new DelayRecoveryException(exceptionMessage);
return this::unblockNewRecoveries;
}
private synchronized void unblockNewRecoveries() {
onNewRecoveryException = null;
}
}
}
}