/*
* Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.spi.impl.operationservice.impl;
import com.hazelcast.instance.Node;
import com.hazelcast.internal.partition.InternalPartition;
import com.hazelcast.internal.partition.InternalPartitionService;
import com.hazelcast.internal.partition.PartitionReplicaVersionManager;
import com.hazelcast.nio.Address;
import com.hazelcast.nio.serialization.Data;
import com.hazelcast.spi.BackupAwareOperation;
import com.hazelcast.spi.FragmentedMigrationAwareService;
import com.hazelcast.spi.Operation;
import com.hazelcast.spi.ServiceNamespaceAware;
import com.hazelcast.spi.ServiceNamespace;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.spi.impl.operationservice.impl.operations.Backup;
import static com.hazelcast.internal.partition.InternalPartition.MAX_BACKUP_COUNT;
import static com.hazelcast.spi.OperationAccessor.hasActiveInvocation;
import static com.hazelcast.spi.OperationAccessor.setCallId;
import static java.lang.Math.min;
/**
* Responsible for creating a backups of an operation.
*/
final class OperationBackupHandler {
private static final boolean ASSERTION_ENABLED = OperationBackupHandler.class.desiredAssertionStatus();
private final Node node;
private final NodeEngineImpl nodeEngine;
private final BackpressureRegulator backpressureRegulator;
private final OutboundOperationHandler outboundOperationHandler;
OperationBackupHandler(OperationServiceImpl operationService, OutboundOperationHandler outboundOperationHandler) {
this.outboundOperationHandler = outboundOperationHandler;
this.node = operationService.node;
this.nodeEngine = operationService.nodeEngine;
this.backpressureRegulator = operationService.backpressureRegulator;
}
/**
* Sends the appropriate backups. This call will not wait till the backups have ACK'ed.
*
* If this call is made with a none BackupAwareOperation, then 0 is returned.
*
* @param op the Operation to backup.
* @return the number of ACKS required to complete the invocation.
* @throws Exception if there is any exception sending the backups.
*/
int sendBackups(Operation op) throws Exception {
if (!(op instanceof BackupAwareOperation)) {
return 0;
}
int backupAcks = 0;
BackupAwareOperation backupAwareOp = (BackupAwareOperation) op;
if (backupAwareOp.shouldBackup()) {
backupAcks = sendBackups0(backupAwareOp);
}
return backupAcks;
}
int sendBackups0(BackupAwareOperation backupAwareOp) throws Exception {
int requestedSyncBackups = requestedSyncBackups(backupAwareOp);
int requestedAsyncBackups = requestedAsyncBackups(backupAwareOp);
int requestedTotalBackups = requestedTotalBackups(backupAwareOp);
if (requestedTotalBackups == 0) {
return 0;
}
Operation op = (Operation) backupAwareOp;
PartitionReplicaVersionManager versionManager = node.getPartitionService().getPartitionReplicaVersionManager();
ServiceNamespace namespace = versionManager.getServiceNamespace(op);
long[] replicaVersions = versionManager.incrementPartitionReplicaVersions(op.getPartitionId(), namespace,
requestedTotalBackups);
boolean syncForced = backpressureRegulator.isSyncForced(backupAwareOp);
int syncBackups = syncBackups(requestedSyncBackups, requestedAsyncBackups, syncForced);
int asyncBackups = asyncBackups(requestedSyncBackups, requestedAsyncBackups, syncForced);
// TODO: This could cause a problem with back pressure
if (!op.returnsResponse()) {
asyncBackups += syncBackups;
syncBackups = 0;
}
if (syncBackups + asyncBackups == 0) {
return 0;
}
return makeBackups(backupAwareOp, op.getPartitionId(), replicaVersions, syncBackups, asyncBackups);
}
int syncBackups(int requestedSyncBackups, int requestedAsyncBackups, boolean syncForced) {
if (syncForced) {
// if force sync enabled, then the sum of the backups
requestedSyncBackups += requestedAsyncBackups;
}
InternalPartitionService partitionService = node.getPartitionService();
int maxBackupCount = partitionService.getMaxAllowedBackupCount();
return min(maxBackupCount, requestedSyncBackups);
}
int asyncBackups(int requestedSyncBackups, int requestedAsyncBackups, boolean syncForced) {
if (syncForced || requestedAsyncBackups == 0) {
// if syncForced, then there will never be any async backups (they are forced to become sync)
// if there are no asyncBackups then we are also done.
return 0;
}
InternalPartitionService partitionService = node.getPartitionService();
int maxBackupCount = partitionService.getMaxAllowedBackupCount();
return min(maxBackupCount - requestedSyncBackups, requestedAsyncBackups);
}
private int requestedSyncBackups(BackupAwareOperation op) {
int backups = op.getSyncBackupCount();
if (backups < 0) {
throw new IllegalArgumentException("Can't create backup for " + op
+ ", sync backup count can't be smaller than 0, but found: " + backups);
}
if (backups > MAX_BACKUP_COUNT) {
throw new IllegalArgumentException("Can't create backup for " + op
+ ", sync backup count can't be larger than " + MAX_BACKUP_COUNT
+ ", but found: " + backups);
}
return backups;
}
private int requestedAsyncBackups(BackupAwareOperation op) {
int backups = op.getAsyncBackupCount();
if (backups < 0) {
throw new IllegalArgumentException("Can't create backup for " + op
+ ", async backup count can't be smaller than 0, but found: " + backups);
}
if (backups > MAX_BACKUP_COUNT) {
throw new IllegalArgumentException("Can't create backup for " + op
+ ", async backup count can't be larger than " + MAX_BACKUP_COUNT
+ ", but found: " + backups);
}
return backups;
}
private int requestedTotalBackups(BackupAwareOperation op) {
int backups = op.getSyncBackupCount() + op.getAsyncBackupCount();
if (backups > MAX_BACKUP_COUNT) {
throw new IllegalArgumentException("Can't create backup for " + op
+ ", the sum of async and sync backups is larger than " + MAX_BACKUP_COUNT
+ ", sync backup count is " + op.getSyncBackupCount()
+ ", async backup count is " + op.getAsyncBackupCount());
}
return backups;
}
private int makeBackups(BackupAwareOperation backupAwareOp, int partitionId, long[] replicaVersions,
int syncBackups, int asyncBackups) {
int sendSyncBackups;
int totalBackups = syncBackups + asyncBackups;
InternalPartitionService partitionService = node.getPartitionService();
InternalPartition partition = partitionService.getPartition(partitionId);
if (totalBackups == 1) {
sendSyncBackups = sendSingleBackup(backupAwareOp, partition, replicaVersions, syncBackups);
} else {
sendSyncBackups = sendMultipleBackups(backupAwareOp, partition, replicaVersions, syncBackups, totalBackups);
}
return sendSyncBackups;
}
private int sendSingleBackup(BackupAwareOperation backupAwareOp, InternalPartition partition,
long[] replicaVersions, int syncBackups) {
// Since there is only one replica, replica index is `1`
Address target = partition.getReplicaAddress(1);
if (target != null) {
// Since there is only one backup, backup operation is sent to only one node.
// If backup operation is converted to `Data`, there will be these operations as below:
// - a temporary memory allocation (byte[]) for `Data`
// - serialize backup operation to allocated memory
// - copy the temporary allocated memory (backup operation data) to output while serializing `Backup`
// In this flow, there are two redundant operations (allocating temporary memory and copying it to output).
// So in this case (there is only one backup), we don't convert backup operation to `Data` as temporary
// before `Backup` is serialized but backup operation is already serialized directly into output
// without any unnecessary memory allocation and copy when it is used as object inside `Backup`.
Operation backupOp = getBackupOperation(backupAwareOp);
assertNoBackupOnPrimaryMember(partition, target);
boolean isSyncBackup = syncBackups == 1;
Backup backup = newBackup(backupAwareOp, backupOp, replicaVersions, 1, isSyncBackup);
outboundOperationHandler.send(backup, target);
if (isSyncBackup) {
return 1;
}
}
return 0;
}
private int sendMultipleBackups(BackupAwareOperation backupAwareOp, InternalPartition partition,
long[] replicaVersions, int syncBackups, int totalBackups) {
int sendSyncBackups = 0;
Operation backupOp = getBackupOperation(backupAwareOp);
Data backupOpData = nodeEngine.getSerializationService().toData(backupOp);
for (int replicaIndex = 1; replicaIndex <= totalBackups; replicaIndex++) {
Address target = partition.getReplicaAddress(replicaIndex);
if (target == null) {
continue;
}
assertNoBackupOnPrimaryMember(partition, target);
boolean isSyncBackup = replicaIndex <= syncBackups;
Backup backup = newBackup(backupAwareOp, backupOpData, replicaVersions, replicaIndex, isSyncBackup);
outboundOperationHandler.send(backup, target);
if (isSyncBackup) {
sendSyncBackups++;
}
}
return sendSyncBackups;
}
private Operation getBackupOperation(BackupAwareOperation backupAwareOp) {
Operation backupOp = backupAwareOp.getBackupOperation();
if (backupOp == null) {
throw new IllegalArgumentException("Backup operation should not be null! " + backupAwareOp);
}
if (ASSERTION_ENABLED) {
checkServiceNamespaces(backupAwareOp, backupOp);
}
Operation op = (Operation) backupAwareOp;
// set service name of backup operation.
// if getServiceName() method is overridden to return the same name
// then this will have no effect.
backupOp.setServiceName(op.getServiceName());
return backupOp;
}
private void checkServiceNamespaces(BackupAwareOperation backupAwareOp, Operation backupOp) {
Operation op = (Operation) backupAwareOp;
Object service;
try {
service = op.getService();
} catch (Exception ignored) {
// operation doesn't know its service name
return;
}
if (service instanceof FragmentedMigrationAwareService) {
assert backupAwareOp instanceof ServiceNamespaceAware
: service + " is instance of FragmentedMigrationAwareService, "
+ backupAwareOp + " should implement ReplicaFragmentAware!";
assert backupOp instanceof ServiceNamespaceAware
: service + " is instance of FragmentedMigrationAwareService, "
+ backupOp + " should implement ReplicaFragmentAware!";
} else {
assert !(backupAwareOp instanceof ServiceNamespaceAware)
: service + " is NOT instance of FragmentedMigrationAwareService, "
+ backupAwareOp + " should NOT implement ReplicaFragmentAware!";
assert !(backupOp instanceof ServiceNamespaceAware)
: service + " is NOT instance of FragmentedMigrationAwareService, "
+ backupOp + " should NOT implement ReplicaFragmentAware!";
}
}
private static Backup newBackup(BackupAwareOperation backupAwareOp, Object backupOp, long[] replicaVersions,
int replicaIndex, boolean respondBack) {
Operation op = (Operation) backupAwareOp;
Backup backup;
if (backupOp instanceof Operation) {
backup = new Backup((Operation) backupOp, op.getCallerAddress(), replicaVersions, respondBack);
} else if (backupOp instanceof Data) {
backup = new Backup((Data) backupOp, op.getCallerAddress(), replicaVersions, respondBack);
} else {
throw new IllegalArgumentException("Only 'Data' or 'Operation' typed backup operation is supported!");
}
backup.setPartitionId(op.getPartitionId()).setReplicaIndex(replicaIndex);
if (hasActiveInvocation(op)) {
setCallId(backup, op.getCallId());
}
return backup;
}
/**
* Verifies that the backup of a partition doesn't end up at the member that also has the primary.
*/
private void assertNoBackupOnPrimaryMember(InternalPartition partition, Address target) {
if (target.equals(node.getThisAddress())) {
throw new IllegalStateException("Normally shouldn't happen! Owner node and backup node "
+ "are the same! " + partition);
}
}
}