/*
* Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.spi.impl.operationservice.impl;
import com.hazelcast.config.Config;
import com.hazelcast.core.ExecutionCallback;
import com.hazelcast.core.HazelcastInstance;
import com.hazelcast.nio.ObjectDataInput;
import com.hazelcast.nio.ObjectDataOutput;
import com.hazelcast.spi.BackupAwareOperation;
import com.hazelcast.spi.BackupOperation;
import com.hazelcast.spi.InternalCompletableFuture;
import com.hazelcast.spi.Operation;
import com.hazelcast.test.AssertTask;
import com.hazelcast.test.HazelcastSerialClassRunner;
import com.hazelcast.test.HazelcastTestSupport;
import com.hazelcast.test.TestThread;
import com.hazelcast.test.annotation.NightlyTest;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import java.io.IOException;
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import static com.hazelcast.spi.properties.GroupProperty.BACKPRESSURE_ENABLED;
import static com.hazelcast.spi.properties.GroupProperty.BACKPRESSURE_MAX_CONCURRENT_INVOCATIONS_PER_PARTITION;
import static com.hazelcast.spi.properties.GroupProperty.BACKPRESSURE_SYNCWINDOW;
import static com.hazelcast.spi.properties.GroupProperty.OPERATION_BACKUP_TIMEOUT_MILLIS;
import static com.hazelcast.spi.properties.GroupProperty.PARTITION_COUNT;
import static java.util.concurrent.TimeUnit.MINUTES;
import static org.junit.Assert.assertEquals;
@RunWith(HazelcastSerialClassRunner.class)
@Category(NightlyTest.class)
public class BackpressureRegulatorStressTest extends HazelcastTestSupport {
// to stress the back-pressure (to expose the problem quickly) we are going to attach some additional data to each
// operation. If many operations are being stored (so no back pressure) then you will very quickly run into OOME.
// So by increasing this, you will increase the chance to see problems.
// On my machine with MEMORY_STRESS_PAYLOAD_SIZE=100000 it takes a very short time (under a minute) to run out of memory if
// back pressure is disabled. If you run these tests using a profiler, make sure you keep an eye out on Memory usage and GC
// activity. It is very easy to detect when back pressure is disabled.
public static final int MEMORY_STRESS_PAYLOAD_SIZE = 100000;
private static final int runningTimeSeconds = (int) MINUTES.toSeconds(5);
private final Random random = new Random();
private final AtomicLong completedCall = new AtomicLong();
private final AtomicLong failedOperationCount = new AtomicLong();
private final AtomicLong globalOperationCount = new AtomicLong();
private final AtomicBoolean stop = new AtomicBoolean();
private HazelcastInstance local;
private HazelcastInstance remote;
private OperationServiceImpl localOperationService;
@Before
public void setup() {
Config config = new Config()
.setProperty(OPERATION_BACKUP_TIMEOUT_MILLIS.getName(), "60000")
.setProperty(BACKPRESSURE_ENABLED.getName(), "true")
.setProperty(BACKPRESSURE_SYNCWINDOW.getName(), "10")
.setProperty(BACKPRESSURE_MAX_CONCURRENT_INVOCATIONS_PER_PARTITION.getName(), "2")
.setProperty(PARTITION_COUNT.getName(), "10");
HazelcastInstance[] cluster = createHazelcastInstanceFactory(2).newInstances(config);
local = cluster[0];
remote = cluster[1];
localOperationService = (OperationServiceImpl) getOperationService(local);
}
@Test(timeout = 600000)
public void asyncInvocation() throws Exception {
test(new StressThreadFactory() {
@Override
public StressThread create() {
StressThread stressThread = new StressThread();
stressThread.returnsResponse = true;
stressThread.syncInvocation = false;
stressThread.runDelayMs = 1;
stressThread.shouldBackup = false;
stressThread.asyncBackups = 0;
stressThread.syncBackups = 0;
stressThread.backupRunDelayMs = 0;
stressThread.partitionId = getPartitionId(remote);
return stressThread;
}
});
}
@Test(timeout = 600000)
public void asyncInvocation_and_syncBackups() throws Exception {
test(new StressThreadFactory() {
@Override
public StressThread create() {
StressThread stressThread = new StressThread();
stressThread.returnsResponse = true;
stressThread.syncInvocation = false;
stressThread.runDelayMs = 0;
stressThread.shouldBackup = false;
stressThread.asyncBackups = 0;
stressThread.syncBackups = 1;
stressThread.backupRunDelayMs = 1;
stressThread.partitionId = getPartitionId(remote);
return stressThread;
}
});
}
@Test(timeout = 600000)
public void asyncInvocation_and_asyncBackups() throws Exception {
test(new StressThreadFactory() {
@Override
public StressThread create() {
StressThread stressThread = new StressThread();
stressThread.returnsResponse = true;
stressThread.syncInvocation = false;
stressThread.runDelayMs = 0;
stressThread.shouldBackup = true;
stressThread.asyncBackups = 1;
stressThread.syncBackups = 0;
stressThread.backupRunDelayMs = 1;
stressThread.partitionId = getPartitionId(remote);
return stressThread;
}
});
}
@Test(timeout = 600000)
public void syncInvocation_and_asyncBackups() throws Exception {
test(new StressThreadFactory() {
@Override
public StressThread create() {
StressThread stressThread = new StressThread();
stressThread.returnsResponse = true;
stressThread.syncInvocation = true;
stressThread.runDelayMs = 0;
stressThread.shouldBackup = true;
stressThread.asyncBackups = 1;
stressThread.syncBackups = 0;
stressThread.backupRunDelayMs = 1;
stressThread.partitionId = getPartitionId(remote);
return stressThread;
}
});
}
@Test(timeout = 600000)
public void asyncInvocation_and_syncBackups_and_asyncBackups() throws Exception {
test(new StressThreadFactory() {
@Override
public StressThread create() {
StressThread stressThread = new StressThread();
stressThread.returnsResponse = true;
stressThread.syncInvocation = false;
stressThread.runDelayMs = 0;
stressThread.shouldBackup = true;
stressThread.asyncBackups = 1;
stressThread.syncBackups = 1;
stressThread.backupRunDelayMs = 1;
stressThread.partitionId = getPartitionId(remote);
return stressThread;
}
});
}
public void test(StressThreadFactory stressThreadFactory) throws Exception {
StressThread stressThread = stressThreadFactory.create();
stressThread.start();
sleepAndStop(stop, runningTimeSeconds);
stressThread.assertSucceedsEventually();
System.out.println("Completed with asynchronous calls, waiting for everything to complete");
assertTrueEventually(new AssertTask() {
@Override
public void run() throws Exception {
assertEquals("the number of completed calls doesn't match the number of expected calls",
globalOperationCount.get(), completedCall.get());
}
});
assertEquals(0, failedOperationCount.get());
// long count = localOperationService.backPressureService.backPressureCount();
// System.out.println("Backpressure count: " + count);
}
private static final AtomicLong THREAD_ID_GENERATOR = new AtomicLong();
private class StressThread extends TestThread {
public int partitionId;
public boolean syncInvocation;
public int asyncBackups;
public int syncBackups;
public boolean shouldBackup;
public boolean returnsResponse;
public int runDelayMs = 1;
public int backupRunDelayMs = 0;
public StressThread() {
super("StressThread-" + THREAD_ID_GENERATOR.incrementAndGet());
}
@Override
public void onError(Throwable t) {
stop.set(true);
}
@Override
public void doRun() {
long operationCount = 0;
long lastSecond = System.currentTimeMillis() / 1000;
while (!stop.get()) {
long currentSecond = System.currentTimeMillis() / 1000;
if (currentSecond != lastSecond) {
lastSecond = currentSecond;
System.out.println(" at: " + operationCount);
}
long expectedResult = random.nextLong();
DummyOperation operation = new DummyOperation(expectedResult);
operation.returnsResponse = returnsResponse;
operation.syncBackups = syncBackups;
operation.asyncBackups = asyncBackups;
operation.runDelayMs = runDelayMs;
operation.backupRunDelayMs = backupRunDelayMs;
operation.shouldBackup = shouldBackup;
if (syncInvocation) {
syncInvoke(operation);
} else {
asyncInvoke(operation);
}
operationCount++;
globalOperationCount.incrementAndGet();
}
}
private void asyncInvoke(DummyOperation operation) {
final long expectedResult = operation.result;
InternalCompletableFuture f = localOperationService.invokeOnPartition(null, operation, partitionId);
f.andThen(new ExecutionCallback() {
@Override
public void onResponse(Object response) {
completedCall.incrementAndGet();
if (!new Long(expectedResult).equals(response)) {
System.out.println("Wrong result received, expecting: " + expectedResult + " but found:" + response);
failedOperationCount.incrementAndGet();
}
}
@Override
public void onFailure(Throwable t) {
completedCall.incrementAndGet();
failedOperationCount.incrementAndGet();
t.printStackTrace();
}
});
}
private void syncInvoke(DummyOperation operation) {
final Long expectedResult = operation.result;
InternalCompletableFuture f = localOperationService.invokeOnPartition(null, operation, partitionId);
completedCall.incrementAndGet();
try {
Long result = (Long) f.join();
if (!expectedResult.equals(result)) {
failedOperationCount.incrementAndGet();
}
} catch (Exception e) {
failedOperationCount.incrementAndGet();
e.printStackTrace();
}
}
}
private interface StressThreadFactory {
StressThread create();
}
static class DummyOperation extends Operation implements BackupAwareOperation {
long result;
int asyncBackups;
int syncBackups;
boolean shouldBackup = false;
boolean returnsResponse = true;
int runDelayMs = 1;
int backupRunDelayMs = 0;
public DummyOperation() {
}
public DummyOperation(long result) {
this.result = result;
}
@Override
public void run() throws Exception {
Thread.sleep(runDelayMs);
}
@Override
public boolean returnsResponse() {
return returnsResponse;
}
@Override
public boolean shouldBackup() {
return shouldBackup;
}
@Override
public int getSyncBackupCount() {
return syncBackups;
}
@Override
public int getAsyncBackupCount() {
return asyncBackups;
}
@Override
public Operation getBackupOperation() {
DummyBackupOperation backupOperation = new DummyBackupOperation();
backupOperation.runDelayMs = backupRunDelayMs;
return backupOperation;
}
@Override
public Object getResponse() {
return result;
}
@Override
protected void writeInternal(ObjectDataOutput out) throws IOException {
super.writeInternal(out);
out.writeLong(result);
out.writeBoolean(returnsResponse);
out.writeInt(runDelayMs);
out.writeBoolean(shouldBackup);
out.writeInt(syncBackups);
out.writeInt(asyncBackups);
out.writeInt(backupRunDelayMs);
byte[] bytes = new byte[MEMORY_STRESS_PAYLOAD_SIZE];
out.writeByteArray(bytes);
}
@Override
protected void readInternal(ObjectDataInput in) throws IOException {
super.readInternal(in);
result = in.readLong();
returnsResponse = in.readBoolean();
runDelayMs = in.readInt();
shouldBackup = in.readBoolean();
syncBackups = in.readInt();
asyncBackups = in.readInt();
backupRunDelayMs = in.readInt();
// reading the stress payload
in.readByteArray();
}
}
public static class DummyBackupOperation extends Operation implements BackupOperation {
private int runDelayMs;
@Override
public void run() throws Exception {
Thread.sleep(runDelayMs);
}
@Override
protected void writeInternal(ObjectDataOutput out) throws IOException {
super.writeInternal(out);
out.writeInt(runDelayMs);
byte[] bytes = new byte[MEMORY_STRESS_PAYLOAD_SIZE];
out.writeByteArray(bytes);
}
@Override
protected void readInternal(ObjectDataInput in) throws IOException {
super.readInternal(in);
runDelayMs = in.readInt();
// reading the stress payload
in.readByteArray();
}
}
}