package org.radargun.stages.cache.background; import java.util.HashMap; import java.util.Map; import java.util.Random; import org.radargun.stages.helpers.Range; import org.radargun.traits.BasicOperations; import org.radargun.utils.TimeService; import org.radargun.utils.Utils; import static org.radargun.stages.cache.background.LogChecker.LastOperation; /** * Logic based on log values. The general idea is that each operation on an entry * should be persisted in the value by appending the operation id to the value. * Therefore, the value works as a log. Old operations (confirmed on all nodes * are eventually unwound from the value). * * The key used for each operation in given stressor is deterministic. Instance * of Random with known seed is used on each node. * * The stressors (executing this logic) and {@link LogChecker checkers} * are synchronized by writing into special values. Once in a while, each stressor * confirms that it has written all operations with lower id by updating * the stressor_* entry. Also, it checks all checker_* keys to see that it can * unwind old records. * * When either stressor or checker is restarted, it should continue with the sequence * from the last confirmed point. Therefore, the stressor_* entry contains the current * seed of the Random and the stressor can load it. * Similar situation happens when the transaction is rolled back. We have to remember * the seeds before the transaction in order to be able to repeat it. * * @author Radim Vansa <rvansa@redhat.com> */ abstract class AbstractLogLogic<ValueType> extends AbstractLogic { protected final BasicOperations.Cache nonTxBasicCache; protected BasicOperations.Cache basicCache; protected final Random operationTypeRandom = new Random(); protected Range keyRange; protected Random keySelectorRandom; protected volatile long operationId = 0; protected volatile long keyId; protected Map<Long, DelayedRemove> delayedRemoves = new HashMap<>(); private volatile long txStartOperationId; private volatile long txStartKeyId = -1; private long txStartRandSeed; private boolean txRolledBack = false; private volatile long lastSuccessfulOpTimestamp; private volatile long lastSuccessfulTxTimestamp; private int remainingTxOps; private volatile long lastConfirmedOperation = -1; private volatile int txFailedAttempts; public AbstractLogLogic(BackgroundOpsManager manager, Range keyRange) { super(manager); this.keyRange = keyRange; this.nonTxBasicCache = manager.getBasicCache(); if (transactionSize <= 0) { basicCache = nonTxBasicCache; } remainingTxOps = transactionSize; } /** * This operation has to be called within the stressor thread and should be sensitive to interruptions */ public void init() { Random random = null; while (random == null && !Thread.currentThread().isInterrupted()) { try { LastOperation lastOperation = (LastOperation) nonTxBasicCache.get(LogChecker.lastOperationKey(stressor.id)); if (lastOperation != null) { operationId = lastOperation.getOperationId() + 1; random = Utils.setRandomSeed(new Random(0), lastOperation.getSeed()); log.debugf("Restarting operations from operation %d", operationId); } else { log.tracef("Initializing stressor random with %d", stressor.id); random = new Random(stressor.id); } } catch (Exception e) { // exception cannot be understood as 0 because of AvailabilityExceptions log.error("Failure getting last operation", e); } } keySelectorRandom = random; } @Override public void invoke() throws InterruptedException { keyId = (keySelectorRandom.nextLong() & Long.MAX_VALUE) % keyRange.getSize() + keyRange.getStart(); do { if (txRolledBack) { keyId = txStartKeyId; operationId = txStartOperationId; Utils.setRandomSeed(keySelectorRandom, txStartRandSeed); txRolledBack = false; txFailedAttempts++; log.tracef("Transaction rollbacked, number of attempts so far=%d", txFailedAttempts); if (manager.getLogLogicConfiguration().getMaxTransactionAttempts() >= 0 && txFailedAttempts > manager.getLogLogicConfiguration().getMaxTransactionAttempts()) { log.error("Maximum number of transaction attempts attained, reporting."); manager.getFailureManager().reportFailedTransactionAttempt(); } } if (trace) { log.tracef("Operation %d on key %s", operationId, keyGenerator.generateKey(keyId)); } } while (!invokeOn(keyId) && !stressor.isInterrupted() && !stressor.isTerminated()); operationId++; } @Override public String getStatus() { long currentTime = TimeService.currentTimeMillis(); return String.format("current[id=%d, key=%s], lastSuccessfulOpTime=%d", operationId, keyGenerator.generateKey(keyId), lastSuccessfulOpTimestamp - currentTime) + (transactionSize > 0 ? String.format(", txStart[id=%d, key=%s], remainingTxOps=%d, lastSuccessfulTxTime=%d", txStartOperationId, keyGenerator.generateKey(txStartKeyId), remainingTxOps, lastSuccessfulTxTimestamp - currentTime) : ""); } /* Return value = true: follow with next operation, false: txRolledBack ? restart from txStartOperationId : retry operationId */ protected boolean invokeOn(long keyId) throws InterruptedException { try { if (transactionSize > 0 && remainingTxOps == transactionSize) { txStartOperationId = operationId; txStartKeyId = keyId; // we could serialize & deserialize instead, but that's not much better txStartRandSeed = Utils.getRandomSeed(keySelectorRandom); startTransaction(); } boolean txBreakRequest = false; try { if (!invokeLogic(keyId)) return false; } catch (BreakTxRequest request) { txBreakRequest = true; } lastSuccessfulOpTimestamp = TimeService.currentTimeMillis(); // for non-transactional caches write the stressor last operation anytime (once in a while) if (transactionSize <= 0 && operationId % manager.getLogLogicConfiguration().getCounterUpdatePeriod() == 0) { writeStressorLastOperation(); lastConfirmedOperation = operationId; } if (transactionSize > 0) { remainingTxOps--; if (remainingTxOps <= 0 || txBreakRequest) { try { ongoingTx.commit(); lastSuccessfulTxTimestamp = TimeService.currentTimeMillis(); txFailedAttempts = 0; } catch (Exception e) { log.debugf("Transaction %s was rolled back, restarting from operation %d", ongoingTx, txStartOperationId); txRolledBack = true; afterRollback(); return false; } finally { remainingTxOps = transactionSize; clearTransaction(); } if (stressor.isTerminated()) { // If the thread was interrupted and cache is registered as Synchronization (not XAResource) // commit phase may fail but no exception is thrown. Therefore, we should terminate immediatelly // as we don't want to remove entries while the modifications have not been written. log.debugf("Stressor %s is about to terminate, not executing delayed removes", stressor.getStatus()); return false; } afterCommit(); if (stressor.isTerminated()) { // the removes may have failed and we have not repeated them due to termination log.debugf("Stressor %s is about to terminate, not writing the last operation %d", stressor.getStatus(), operationId); return false; } if (txBreakRequest) { log.debugf("Transaction was committed sooner, retrying operation %d", operationId); return false; } // for non-transactional caches write the stressor last operation only after the transaction // has finished try { startTransaction(); writeStressorLastOperation(); ongoingTx.commit(); lastConfirmedOperation = operationId; } catch (Exception e) { log.error("Cannot write stressor last operation", e); } finally { clearTransaction(); } } } return true; } catch (Exception e) { InterruptedException ie = Utils.findThrowableCauseByClass(e, InterruptedException.class); if (ie != null) { throw ie; } else if (e.getClass().getName().contains("SuspectException")) { log.error("Request failed due to SuspectException: " + e.getMessage()); } else { log.error("Cache operation error", e); } if (transactionSize > 0 && ongoingTx != null) { try { ongoingTx.rollback(); log.errorf("Transaction %s rolled back", ongoingTx); } catch (Exception e1) { log.errorf(e1, "Error while rolling back transaction %s", ongoingTx); } finally { log.debugf("Restarting from operation %d, current operation %d", txStartOperationId, operationId); clearTransaction(); remainingTxOps = transactionSize; txRolledBack = true; afterRollback(); } } return false; // on the same key } } protected void afterRollback() { delayedRemoves.clear(); } protected boolean afterCommit() { try { int delayedRemoveAttempts = 0; while (!stressor.isTerminated()) { boolean delayedRemoveError = false; try { if (ongoingTx != null) { try { ongoingTx.rollback(); } catch (Exception e) { log.error("Failed to rollback ongoing transaction", e); } } startTransaction(); for (DelayedRemove delayedRemove : delayedRemoves.values()) { try { // avoid infinite loops -> try 'maxDelayedRemoveAttempts' times if (manager.getLogLogicConfiguration().getMaxDelayedRemoveAttempts() >= 0 && delayedRemoveAttempts > manager.getLogLogicConfiguration().getMaxDelayedRemoveAttempts()) { log.errorf("Maximum number of delayed remove attempts on key %s attained, reporting.", keyGenerator.generateKey(delayedRemove.keyId)); manager.getFailureManager().reportDelayedRemoveError(); stressor.requestTerminate(); return false; } checkedRemoveValue(delayedRemove.keyId, delayedRemove.oldValue); } catch (Exception e) { if (manager.getLogLogicConfiguration().getMaxDelayedRemoveAttempts() >= 0) { delayedRemoveAttempts++; } delayedRemoveError = true; throw e; } } ongoingTx.commit(); lastSuccessfulTxTimestamp = TimeService.currentTimeMillis(); delayedRemoves.clear(); return true; } catch (Exception e) { // Record exceptions not originating from checkedRemoveValue (e.g. tx.commit) if (!delayedRemoveError && manager.getLogLogicConfiguration().getMaxDelayedRemoveAttempts() >= 0) { delayedRemoveAttempts++; } log.error("Error while executing delayed removes.", e); } finally { delayedRemoveError = false; } } } finally { clearTransaction(); } return true; } protected void startTransaction() { ongoingTx = manager.newTransaction(); basicCache = ongoingTx.wrap(nonTxBasicCache); ongoingTx.begin(); } protected void clearTransaction() { ongoingTx = null; basicCache = null; } protected void delayedRemoveValue(long keyId, ValueType prevValue) throws Exception { if (transactionSize <= 0) { checkedRemoveValue(keyId, prevValue); } else { // if we moved around the key within one transaction multiple times we don't want to delete the complement delayedRemoves.remove(~keyId); delayedRemoves.put(keyId, new DelayedRemove(keyId, prevValue)); } } protected abstract boolean checkedRemoveValue(long keyId, ValueType oldValue) throws Exception; protected void writeStressorLastOperation() { try { // we have to write down the keySelectorRandom as well in order to be able to continue work if this slave // is restarted basicCache.put(LogChecker.lastOperationKey(stressor.id), new LastOperation(operationId, Utils.getRandomSeed(keySelectorRandom))); } catch (Exception e) { log.errorf(e, "Error while writing last operation %d for stressor %s", operationId, stressor.getStatus()); } } protected abstract boolean invokeLogic(long keyId) throws Exception; /** * Returns minimum of checked (confirmed) operations for given stressor thread across all nodes. */ protected long getCheckedOperation(int stressorId, long operationId) throws StressorException, BreakTxRequest { long minCheckedOperation = Long.MAX_VALUE; for (int i = 0; i < manager.getSlaveState().getGroupSize(); ++i) { long lastCheckedOperationId = Long.MIN_VALUE; try { LastOperation lastOperation = (LastOperation) basicCache.get(LogChecker.checkerKey(i, stressorId)); if (lastOperation != null) { lastCheckedOperationId = lastOperation.getOperationId(); } } catch (Exception e) { log.errorf(e, "Cannot read last checked operation id for slave %d, stressor %d", i, stressorId); throw new StressorException(e); } if (lastCheckedOperationId < operationId && manager.getLogLogicConfiguration().isIgnoreDeadCheckers() && !manager.isSlaveAlive(i)) { try { Long ignoredOperationId = (Long) basicCache.get(LogChecker.ignoredKey(i, stressorId)); if (ignoredOperationId == null || ignoredOperationId < operationId) { log.tracef("Setting ignore operation for checker slave %d and stressor %d: %d -> %d (last checked operation %d)", i, stressorId, ignoredOperationId, operationId, lastCheckedOperationId); basicCache.put(LogChecker.ignoredKey(i, stressorId), operationId); if (transactionSize > 0) { throw new BreakTxRequest(); } } minCheckedOperation = Math.min(minCheckedOperation, operationId); } catch (BreakTxRequest request) { throw request; } catch (Exception e) { log.errorf(e, "Cannot overwrite ignored operation id for slave %d", "stressor %d", i, stressorId); throw new StressorException(e); } } else { minCheckedOperation = Math.min(minCheckedOperation, lastCheckedOperationId); } } return minCheckedOperation; } public long getLastConfirmedOperation() { return lastConfirmedOperation; } protected class DelayedRemove { public final long keyId; public final ValueType oldValue; protected DelayedRemove(long keyId, ValueType oldValue) { this.keyId = keyId; this.oldValue = oldValue; } } }