/*
* Copyright (C) 2012-2015 DataStax Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datastax.driver.core.policies;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.ConsistencyLevel;
import com.datastax.driver.core.Statement;
import com.datastax.driver.core.WriteType;
import com.datastax.driver.core.exceptions.DriverException;
/**
* The default retry policy.
* <p/>
* This policy retries queries in only two cases:
* <ul>
* <li>On a read timeout, if enough replicas replied but data was not retrieved.</li>
* <li>On a write timeout, if we timeout while writing the distributed log used by batch statements.</li>
* </ul>
* <p/>
* This retry policy is conservative in that it will never retry with a
* different consistency level than the one of the initial operation.
* <p/>
* In some cases, it may be convenient to use a more aggressive retry policy
* like {@link DowngradingConsistencyRetryPolicy}.
*/
public class DefaultRetryPolicy implements RetryPolicy {
public static final DefaultRetryPolicy INSTANCE = new DefaultRetryPolicy();
private DefaultRetryPolicy() {
}
/**
* {@inheritDoc}
* <p/>
* This implementation triggers a maximum of one retry, and only if enough
* replicas had responded to the read request but data was not retrieved
* amongst those. Indeed, that case usually means that enough replica
* are alive to satisfy the consistency but the coordinator picked a
* dead one for data retrieval, not having detected that replica as dead
* yet. The reasoning for retrying then is that by the time we get the
* timeout the dead replica will likely have been detected as dead and
* the retry has a high chance of success.
*
* @return {@code RetryDecision.retry(cl)} if no retry attempt has yet been tried and
* {@code receivedResponses >= requiredResponses && !dataRetrieved}, {@code RetryDecision.rethrow()} otherwise.
*/
@Override
public RetryDecision onReadTimeout(Statement statement, ConsistencyLevel cl, int requiredResponses, int receivedResponses, boolean dataRetrieved, int nbRetry) {
if (nbRetry != 0)
return RetryDecision.rethrow();
return receivedResponses >= requiredResponses && !dataRetrieved ? RetryDecision.retry(cl) : RetryDecision.rethrow();
}
/**
* {@inheritDoc}
* <p/>
* This implementation triggers a maximum of one retry, and only in the case of
* a {@code WriteType.BATCH_LOG} write. The reasoning for the retry in
* that case is that write to the distributed batch log is tried by the
* coordinator of the write against a small subset of all the nodes alive
* in the local datacenter. Hence, a timeout usually means that none of
* the nodes in that subset were alive but the coordinator hasn't
* detected them as dead. By the time we get the timeout the dead
* nodes will likely have been detected as dead and the retry has thus a
* high chance of success.
*
* @return {@code RetryDecision.retry(cl)} if no retry attempt has yet been tried and
* {@code writeType == WriteType.BATCH_LOG}, {@code RetryDecision.rethrow()} otherwise.
*/
@Override
public RetryDecision onWriteTimeout(Statement statement, ConsistencyLevel cl, WriteType writeType, int requiredAcks, int receivedAcks, int nbRetry) {
if (nbRetry != 0)
return RetryDecision.rethrow();
// If the batch log write failed, retry the operation as this might just be we were unlucky at picking candidates
// JAVA-764: testing the write type automatically filters out serial consistency levels as these have always WriteType.CAS.
return writeType == WriteType.BATCH_LOG ? RetryDecision.retry(cl) : RetryDecision.rethrow();
}
/**
* {@inheritDoc}
* <p/>
* This implementation does the following:
* <ul>
* <li>if this is the first retry ({@code nbRetry == 0}), it triggers a retry on the next host in the query plan
* with the same consistency level ({@link RetryPolicy.RetryDecision#tryNextHost(ConsistencyLevel) RetryDecision#tryNextHost(null)}.
* The rationale is that the first coordinator might have been network-isolated from all other nodes (thinking
* they're down), but still able to communicate with the client; in that case, retrying on the same host has almost
* no chance of success, but moving to the next host might solve the issue.</li>
* <li>otherwise, the exception is rethrow.</li>
* </ul>
*/
@Override
public RetryDecision onUnavailable(Statement statement, ConsistencyLevel cl, int requiredReplica, int aliveReplica, int nbRetry) {
return (nbRetry == 0)
? RetryDecision.tryNextHost(null)
: RetryDecision.rethrow();
}
/**
* {@inheritDoc}
*/
@Override
public RetryDecision onRequestError(Statement statement, ConsistencyLevel cl, DriverException e, int nbRetry) {
return RetryDecision.tryNextHost(cl);
}
@Override
public void init(Cluster cluster) {
// nothing to do
}
@Override
public void close() {
// nothing to do
}
}