/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.replication;
import org.apache.lucene.store.AlreadyClosedException;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.DocWriteResponse;
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
import org.elasticsearch.action.bulk.BulkItemRequest;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkShardRequest;
import org.elasticsearch.action.bulk.BulkShardResponse;
import org.elasticsearch.action.bulk.TransportShardBulkAction;
import org.elasticsearch.action.bulk.TransportShardBulkActionTests;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.action.support.replication.ReplicationOperation;
import org.elasticsearch.action.support.replication.ReplicationRequest;
import org.elasticsearch.action.support.replication.ReplicationResponse;
import org.elasticsearch.action.support.replication.TransportReplicationAction.ReplicaResponse;
import org.elasticsearch.action.support.replication.TransportWriteAction;
import org.elasticsearch.action.support.replication.TransportWriteActionTestHelper;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.RecoverySource;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.ShardRoutingHelper;
import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.cluster.routing.TestShardRouting;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.engine.EngineFactory;
import org.elasticsearch.index.seqno.GlobalCheckpointSyncAction;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.IndexShardTestCase;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.ShardPath;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.indices.recovery.RecoveryTarget;
import org.elasticsearch.transport.TransportRequest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
public abstract class ESIndexLevelReplicationTestCase extends IndexShardTestCase {
protected final Index index = new Index("test", "uuid");
private final ShardId shardId = new ShardId(index, 0);
private final Map<String, String> indexMapping = Collections.singletonMap("type", "{ \"type\": {} }");
protected ReplicationGroup createGroup(int replicas) throws IOException {
IndexMetaData metaData = buildIndexMetaData(replicas);
return new ReplicationGroup(metaData);
}
protected IndexMetaData buildIndexMetaData(int replicas) throws IOException {
return buildIndexMetaData(replicas, indexMapping);
}
protected IndexMetaData buildIndexMetaData(int replicas, Map<String, String> mappings) throws IOException {
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, replicas)
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.build();
IndexMetaData.Builder metaData = IndexMetaData.builder(index.getName())
.settings(settings)
.primaryTerm(0, 1);
for (Map.Entry<String, String> typeMapping : mappings.entrySet()) {
metaData.putMapping(typeMapping.getKey(), typeMapping.getValue());
}
return metaData.build();
}
protected DiscoveryNode getDiscoveryNode(String id) {
return new DiscoveryNode(id, id, buildNewFakeTransportAddress(), Collections.emptyMap(),
Collections.singleton(DiscoveryNode.Role.DATA), Version.CURRENT);
}
protected class ReplicationGroup implements AutoCloseable, Iterable<IndexShard> {
private IndexShard primary;
private IndexMetaData indexMetaData;
private final List<IndexShard> replicas;
private final AtomicInteger replicaId = new AtomicInteger();
private final AtomicInteger docId = new AtomicInteger();
boolean closed = false;
ReplicationGroup(final IndexMetaData indexMetaData) throws IOException {
final ShardRouting primaryRouting = this.createShardRouting("s0", true);
primary = newShard(primaryRouting, indexMetaData, null, getEngineFactory(primaryRouting));
replicas = new ArrayList<>();
this.indexMetaData = indexMetaData;
updateAllocationIDsOnPrimary();
for (int i = 0; i < indexMetaData.getNumberOfReplicas(); i++) {
addReplica();
}
}
private ShardRouting createShardRouting(String nodeId, boolean primary) {
return TestShardRouting.newShardRouting(shardId, nodeId, primary, ShardRoutingState.INITIALIZING,
primary ? RecoverySource.StoreRecoverySource.EMPTY_STORE_INSTANCE : RecoverySource.PeerRecoverySource.INSTANCE);
}
protected EngineFactory getEngineFactory(ShardRouting routing) {
return null;
}
public int indexDocs(final int numOfDoc) throws Exception {
for (int doc = 0; doc < numOfDoc; doc++) {
final IndexRequest indexRequest = new IndexRequest(index.getName(), "type", Integer.toString(docId.incrementAndGet()))
.source("{}", XContentType.JSON);
final BulkItemResponse response = index(indexRequest);
if (response.isFailed()) {
throw response.getFailure().getCause();
} else {
assertEquals(DocWriteResponse.Result.CREATED, response.getResponse().getResult());
}
}
return numOfDoc;
}
public int appendDocs(final int numOfDoc) throws Exception {
for (int doc = 0; doc < numOfDoc; doc++) {
final IndexRequest indexRequest = new IndexRequest(index.getName(), "type").source("{}", XContentType.JSON);
final BulkItemResponse response = index(indexRequest);
if (response.isFailed()) {
throw response.getFailure().getCause();
} else if (response.isFailed() == false) {
assertEquals(DocWriteResponse.Result.CREATED, response.getResponse().getResult());
}
}
return numOfDoc;
}
public BulkItemResponse index(IndexRequest indexRequest) throws Exception {
PlainActionFuture<BulkItemResponse> listener = new PlainActionFuture<>();
final ActionListener<BulkShardResponse> wrapBulkListener = ActionListener.wrap(
bulkShardResponse -> listener.onResponse(bulkShardResponse.getResponses()[0]),
listener::onFailure);
BulkItemRequest[] items = new BulkItemRequest[1];
items[0] = new BulkItemRequest(0, indexRequest);
BulkShardRequest request = new BulkShardRequest(shardId, indexRequest.getRefreshPolicy(), items);
new IndexingAction(request, wrapBulkListener, this).execute();
return listener.get();
}
public synchronized void startAll() throws IOException {
startReplicas(replicas.size());
}
public synchronized int startReplicas(int numOfReplicasToStart) throws IOException {
if (primary.routingEntry().initializing()) {
startPrimary();
}
int started = 0;
for (IndexShard replicaShard : replicas) {
if (replicaShard.routingEntry().initializing()) {
recoverReplica(replicaShard);
started++;
if (started > numOfReplicasToStart) {
break;
}
}
}
return started;
}
public void startPrimary() throws IOException {
final DiscoveryNode pNode = getDiscoveryNode(primary.routingEntry().currentNodeId());
primary.markAsRecovering("store", new RecoveryState(primary.routingEntry(), pNode, null));
primary.recoverFromStore();
primary.updateRoutingEntry(ShardRoutingHelper.moveToStarted(primary.routingEntry()));
updateAllocationIDsOnPrimary();
for (final IndexShard replica : replicas) {
recoverReplica(replica);
}
}
public IndexShard addReplica() throws IOException {
final ShardRouting replicaRouting = createShardRouting("s" + replicaId.incrementAndGet(), false);
final IndexShard replica =
newShard(replicaRouting, indexMetaData, null, getEngineFactory(replicaRouting));
addReplica(replica);
return replica;
}
public synchronized void addReplica(IndexShard replica) {
assert shardRoutings().stream()
.filter(shardRouting -> shardRouting.isSameAllocation(replica.routingEntry())).findFirst().isPresent() == false :
"replica with aId [" + replica.routingEntry().allocationId() + "] already exists";
replica.updatePrimaryTerm(primary.getPrimaryTerm());
replicas.add(replica);
updateAllocationIDsOnPrimary();
}
public synchronized IndexShard addReplicaWithExistingPath(final ShardPath shardPath, final String nodeId) throws IOException {
final ShardRouting shardRouting = TestShardRouting.newShardRouting(
shardId,
nodeId,
false, ShardRoutingState.INITIALIZING,
RecoverySource.PeerRecoverySource.INSTANCE);
final IndexShard newReplica = newShard(shardRouting, shardPath, indexMetaData, null,
getEngineFactory(shardRouting));
replicas.add(newReplica);
updateAllocationIDsOnPrimary();
return newReplica;
}
public synchronized List<IndexShard> getReplicas() {
return Collections.unmodifiableList(replicas);
}
/**
* promotes the specific replica as the new primary
*/
public synchronized void promoteReplicaToPrimary(IndexShard replica) throws IOException {
final long newTerm = indexMetaData.primaryTerm(shardId.id()) + 1;
IndexMetaData.Builder newMetaData =
IndexMetaData.builder(indexMetaData).primaryTerm(shardId.id(), newTerm);
indexMetaData = newMetaData.build();
for (IndexShard shard: replicas) {
shard.updatePrimaryTerm(newTerm);
}
boolean found = replicas.remove(replica);
assert found;
closeShards(primary);
primary = replica;
replica.updateRoutingEntry(replica.routingEntry().moveActiveReplicaToPrimary());
updateAllocationIDsOnPrimary();
}
synchronized boolean removeReplica(IndexShard replica) {
final boolean removed = replicas.remove(replica);
if (removed) {
updateAllocationIDsOnPrimary();
}
return removed;
}
public void recoverReplica(IndexShard replica) throws IOException {
recoverReplica(replica, (r, sourceNode) -> new RecoveryTarget(r, sourceNode, recoveryListener, version -> {}));
}
public void recoverReplica(IndexShard replica, BiFunction<IndexShard, DiscoveryNode, RecoveryTarget> targetSupplier)
throws IOException {
recoverReplica(replica, targetSupplier, true);
}
public void recoverReplica(
IndexShard replica,
BiFunction<IndexShard, DiscoveryNode, RecoveryTarget> targetSupplier,
boolean markAsRecovering) throws IOException {
ESIndexLevelReplicationTestCase.this.recoverReplica(replica, primary, targetSupplier, markAsRecovering);
updateAllocationIDsOnPrimary();
}
public synchronized DiscoveryNode getPrimaryNode() {
return getDiscoveryNode(primary.routingEntry().currentNodeId());
}
public Future<Void> asyncRecoverReplica(
final IndexShard replica, final BiFunction<IndexShard, DiscoveryNode, RecoveryTarget> targetSupplier) throws IOException {
final FutureTask<Void> task = new FutureTask<>(() -> {
recoverReplica(replica, targetSupplier);
return null;
});
threadPool.generic().execute(task);
return task;
}
public synchronized void assertAllEqual(int expectedCount) throws IOException {
Set<String> primaryIds = getShardDocUIDs(primary);
assertThat(primaryIds.size(), equalTo(expectedCount));
for (IndexShard replica : replicas) {
Set<String> replicaIds = getShardDocUIDs(replica);
Set<String> temp = new HashSet<>(primaryIds);
temp.removeAll(replicaIds);
assertThat(replica.routingEntry() + " is missing docs", temp, empty());
temp = new HashSet<>(replicaIds);
temp.removeAll(primaryIds);
assertThat(replica.routingEntry() + " has extra docs", temp, empty());
}
}
public synchronized void refresh(String source) {
for (IndexShard shard : this) {
shard.refresh(source);
}
}
public synchronized void flush() {
final FlushRequest request = new FlushRequest();
for (IndexShard shard : this) {
shard.flush(request);
}
}
public synchronized List<ShardRouting> shardRoutings() {
return StreamSupport.stream(this.spliterator(), false).map(IndexShard::routingEntry).collect(Collectors.toList());
}
@Override
public synchronized void close() throws Exception {
if (closed == false) {
closed = true;
closeShards(this);
} else {
throw new AlreadyClosedException("too bad");
}
}
@Override
public Iterator<IndexShard> iterator() {
return Iterators.concat(replicas.iterator(), Collections.singleton(primary).iterator());
}
public IndexShard getPrimary() {
return primary;
}
public void syncGlobalCheckpoint() {
PlainActionFuture<ReplicationResponse> listener = new PlainActionFuture<>();
try {
new GlobalCheckpointSync(listener, this).execute();
listener.get();
} catch (Exception e) {
throw new AssertionError(e);
}
}
private void updateAllocationIDsOnPrimary() {
Set<String> active = new HashSet<>();
Set<String> initializing = new HashSet<>();
for (ShardRouting shard: shardRoutings()) {
if (shard.active()) {
active.add(shard.allocationId().getId());
} else {
initializing.add(shard.allocationId().getId());
}
}
primary.updateAllocationIdsFromMaster(active, initializing);
}
}
abstract class ReplicationAction<Request extends ReplicationRequest<Request>,
ReplicaRequest extends ReplicationRequest<ReplicaRequest>,
Response extends ReplicationResponse> {
private final Request request;
private ActionListener<Response> listener;
private final ReplicationGroup replicationGroup;
private final String opType;
ReplicationAction(Request request, ActionListener<Response> listener, ReplicationGroup group, String opType) {
this.request = request;
this.listener = listener;
this.replicationGroup = group;
this.opType = opType;
}
public void execute() throws Exception {
new ReplicationOperation<Request, ReplicaRequest, PrimaryResult>(request, new PrimaryRef(),
new ActionListener<PrimaryResult>() {
@Override
public void onResponse(PrimaryResult result) {
result.respond(listener);
}
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}
}, new ReplicasRef(), () -> null, logger, opType) {
@Override
protected List<ShardRouting> getShards(ShardId shardId, ClusterState state) {
return replicationGroup.shardRoutings();
}
@Override
protected String checkActiveShardCount() {
return null;
}
@Override
protected Set<String> getInSyncAllocationIds(ShardId shardId, ClusterState clusterState) {
return replicationGroup.shardRoutings().stream().filter(ShardRouting::active).map(r -> r.allocationId().getId())
.collect(Collectors.toSet());
}
}.execute();
}
protected abstract PrimaryResult performOnPrimary(IndexShard primary, Request request) throws Exception;
protected abstract void performOnReplica(ReplicaRequest request, IndexShard replica) throws Exception;
class PrimaryRef implements ReplicationOperation.Primary<Request, ReplicaRequest, PrimaryResult> {
@Override
public ShardRouting routingEntry() {
return replicationGroup.primary.routingEntry();
}
@Override
public void failShard(String message, Exception exception) {
throw new UnsupportedOperationException();
}
@Override
public PrimaryResult perform(Request request) throws Exception {
PrimaryResult response = performOnPrimary(replicationGroup.primary, request);
response.replicaRequest().primaryTerm(replicationGroup.primary.getPrimaryTerm());
return response;
}
@Override
public void updateLocalCheckpointForShard(String allocationId, long checkpoint) {
replicationGroup.getPrimary().updateLocalCheckpointForShard(allocationId, checkpoint);
}
@Override
public long localCheckpoint() {
return replicationGroup.getPrimary().getLocalCheckpoint();
}
@Override
public long globalCheckpoint() {
return replicationGroup.getPrimary().getGlobalCheckpoint();
}
}
class ReplicasRef implements ReplicationOperation.Replicas<ReplicaRequest> {
@Override
public void performOn(
final ShardRouting replicaRouting,
final ReplicaRequest request,
final long globalCheckpoint,
final ActionListener<ReplicationOperation.ReplicaResponse> listener) {
try {
IndexShard replica = replicationGroup.replicas.stream()
.filter(s -> replicaRouting.isSameAllocation(s.routingEntry())).findFirst().get();
replica.updateGlobalCheckpointOnReplica(globalCheckpoint);
performOnReplica(request, replica);
listener.onResponse(new ReplicaResponse(replica.routingEntry().allocationId().getId(), replica.getLocalCheckpoint()));
} catch (Exception e) {
listener.onFailure(e);
}
}
@Override
public void failShardIfNeeded(ShardRouting replica, long primaryTerm, String message, Exception exception,
Runnable onSuccess, Consumer<Exception> onPrimaryDemoted,
Consumer<Exception> onIgnoredFailure) {
throw new UnsupportedOperationException();
}
@Override
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, long primaryTerm, Runnable onSuccess,
Consumer<Exception> onPrimaryDemoted, Consumer<Exception> onIgnoredFailure) {
throw new UnsupportedOperationException();
}
}
class PrimaryResult implements ReplicationOperation.PrimaryResult<ReplicaRequest> {
final ReplicaRequest replicaRequest;
final Response finalResponse;
PrimaryResult(ReplicaRequest replicaRequest, Response finalResponse) {
this.replicaRequest = replicaRequest;
this.finalResponse = finalResponse;
}
@Override
public ReplicaRequest replicaRequest() {
return replicaRequest;
}
@Override
public void setShardInfo(ReplicationResponse.ShardInfo shardInfo) {
finalResponse.setShardInfo(shardInfo);
}
public void respond(ActionListener<Response> listener) {
listener.onResponse(finalResponse);
}
}
}
class IndexingAction extends ReplicationAction<BulkShardRequest, BulkShardRequest, BulkShardResponse> {
IndexingAction(BulkShardRequest request, ActionListener<BulkShardResponse> listener, ReplicationGroup replicationGroup) {
super(request, listener, replicationGroup, "indexing");
}
@Override
protected PrimaryResult performOnPrimary(IndexShard primary, BulkShardRequest request) throws Exception {
final TransportWriteAction.WritePrimaryResult<BulkShardRequest, BulkShardResponse> result = executeShardBulkOnPrimary(primary, request);
return new PrimaryResult(result.replicaRequest(), result.finalResponseIfSuccessful);
}
@Override
protected void performOnReplica(BulkShardRequest request, IndexShard replica) throws Exception {
executeShardBulkOnReplica(replica, request);
}
}
private TransportWriteAction.WritePrimaryResult<BulkShardRequest, BulkShardResponse> executeShardBulkOnPrimary(IndexShard primary, BulkShardRequest request) throws Exception {
for (BulkItemRequest itemRequest : request.items()) {
if (itemRequest.request() instanceof IndexRequest) {
((IndexRequest) itemRequest.request()).process(null, index.getName());
}
}
final TransportWriteAction.WritePrimaryResult<BulkShardRequest, BulkShardResponse> result =
TransportShardBulkAction.performOnPrimary(request, primary, null,
System::currentTimeMillis, new TransportShardBulkActionTests.NoopMappingUpdatePerformer());
request.primaryTerm(primary.getPrimaryTerm());
TransportWriteActionTestHelper.performPostWriteActions(primary, request, result.location, logger);
return result;
}
private void executeShardBulkOnReplica(IndexShard replica, BulkShardRequest request) throws Exception {
final Translog.Location location = TransportShardBulkAction.performOnReplica(request, replica);
TransportWriteActionTestHelper.performPostWriteActions(replica, request, location, logger);
}
/**
* indexes the given requests on the supplied primary, modifying it for replicas
*/
BulkShardRequest indexOnPrimary(IndexRequest request, IndexShard primary) throws Exception {
final BulkItemRequest bulkItemRequest = new BulkItemRequest(0, request);
BulkItemRequest[] bulkItemRequests = new BulkItemRequest[1];
bulkItemRequests[0] = bulkItemRequest;
final BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, request.getRefreshPolicy(), bulkItemRequests);
final TransportWriteAction.WritePrimaryResult<BulkShardRequest, BulkShardResponse> result =
executeShardBulkOnPrimary(primary, bulkShardRequest);
return result.replicaRequest();
}
/**
* indexes the given requests on the supplied replica shard
*/
void indexOnReplica(BulkShardRequest request, IndexShard replica) throws Exception {
executeShardBulkOnReplica(replica, request);
}
class GlobalCheckpointSync extends ReplicationAction<
GlobalCheckpointSyncAction.Request,
GlobalCheckpointSyncAction.Request,
ReplicationResponse> {
GlobalCheckpointSync(final ActionListener<ReplicationResponse> listener, final ReplicationGroup replicationGroup) {
super(
new GlobalCheckpointSyncAction.Request(replicationGroup.getPrimary().shardId()),
listener,
replicationGroup,
"global_checkpoint_sync");
}
@Override
protected PrimaryResult performOnPrimary(
final IndexShard primary, final GlobalCheckpointSyncAction.Request request) throws Exception {
primary.getTranslog().sync();
return new PrimaryResult(request, new ReplicationResponse());
}
@Override
protected void performOnReplica(final GlobalCheckpointSyncAction.Request request, final IndexShard replica) throws IOException {
replica.getTranslog().sync();
}
}
}