/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.service;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.net.InetAddress;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.concurrent.StageManager;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.ColumnFamily;
import org.apache.cassandra.db.ReadCommand;
import org.apache.cassandra.db.ReadResponse;
import org.apache.cassandra.db.Row;
import org.apache.cassandra.io.util.DataOutputBuffer;
import org.apache.cassandra.net.IAsyncCallback;
import org.apache.cassandra.net.Message;
import org.apache.cassandra.net.MessagingService;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.WrappedRunnable;
/**
* ConsistencyChecker does the following:
*
* [ConsistencyChecker.run]
* (1) sends DIGEST read requests to each other replica of the given row.
*
* [DigestResponseHandler]
* (2) If any of the digests to not match the local one, it sends a second round of requests
* to each replica, this time for the full data
*
* [DataRepairHandler]
* (3) processes full-read responses and invokes resolve. The actual sending of messages
* repairing out-of-date or missing data is handled by ReadResponseResolver.
*/
class ConsistencyChecker implements Runnable
{
private static Logger logger_ = LoggerFactory.getLogger(ConsistencyChecker.class);
private static ScheduledExecutorService executor_ = new ScheduledThreadPoolExecutor(1); // TODO add JMX
private final String table_;
private final Row row_;
protected final List<InetAddress> replicas_;
private final ReadCommand readCommand_;
public ConsistencyChecker(String table, Row row, List<InetAddress> endpoints, ReadCommand readCommand)
{
table_ = table;
row_ = row;
replicas_ = endpoints;
readCommand_ = readCommand;
assert replicas_.contains(FBUtilities.getLocalAddress());
}
public void run()
{
ReadCommand readCommandDigestOnly = constructReadMessage(true);
try
{
Message message = readCommandDigestOnly.makeReadMessage();
if (logger_.isDebugEnabled())
logger_.debug("Reading consistency digest for " + readCommand_.key + " from " + message.getMessageId() + "@[" + StringUtils.join(replicas_, ", ") + "]");
MessagingService.instance.addCallback(new DigestResponseHandler(), message.getMessageId());
for (InetAddress endpoint : replicas_)
{
if (!endpoint.equals(FBUtilities.getLocalAddress()))
MessagingService.instance.sendOneWay(message, endpoint);
}
}
catch (IOException ex)
{
throw new RuntimeException(ex);
}
}
private ReadCommand constructReadMessage(boolean isDigestQuery)
{
ReadCommand readCommand = readCommand_.copy();
readCommand.setDigestQuery(isDigestQuery);
return readCommand;
}
class DigestResponseHandler implements IAsyncCallback
{
private boolean repairInvoked;
private final ByteBuffer localDigest = ColumnFamily.digest(row_.cf);
public synchronized void response(Message response)
{
if (repairInvoked)
return;
try
{
byte[] body = response.getMessageBody();
ByteArrayInputStream bufIn = new ByteArrayInputStream(body);
ReadResponse result = ReadResponse.serializer().deserialize(new DataInputStream(bufIn));
ByteBuffer digest = result.digest();
if (!localDigest.equals(digest))
{
ReadResponseResolver readResponseResolver = new ReadResponseResolver(table_);
IAsyncCallback responseHandler = new DataRepairHandler(row_, replicas_.size(), readResponseResolver);
ReadCommand readCommand = constructReadMessage(false);
Message message = readCommand.makeReadMessage();
if (logger_.isDebugEnabled())
logger_.debug("Digest mismatch; re-reading " + readCommand_.key + " from " + message.getMessageId() + "@[" + StringUtils.join(replicas_, ", ") + "]");
MessagingService.instance.addCallback(responseHandler, message.getMessageId());
for (InetAddress endpoint : replicas_)
{
if (!endpoint.equals(FBUtilities.getLocalAddress()))
MessagingService.instance.sendOneWay(message, endpoint);
}
repairInvoked = true;
}
}
catch (Exception e)
{
throw new RuntimeException("Error handling responses for " + row_, e);
}
}
}
static class DataRepairHandler implements IAsyncCallback
{
private final Collection<Message> responses_ = new LinkedBlockingQueue<Message>();
private final ReadResponseResolver readResponseResolver_;
private final int majority_;
public DataRepairHandler(Row localRow, int responseCount, ReadResponseResolver readResponseResolver) throws IOException
{
readResponseResolver_ = readResponseResolver;
majority_ = (responseCount / 2) + 1;
// wrap localRow in a response Message so it doesn't need to be special-cased in the resolver
ReadResponse readResponse = new ReadResponse(localRow);
Message fakeMessage = new Message(FBUtilities.getLocalAddress(), StorageService.Verb.INTERNAL_RESPONSE, ArrayUtils.EMPTY_BYTE_ARRAY);
responses_.add(fakeMessage);
readResponseResolver_.injectPreProcessed(fakeMessage, readResponse);
}
// synchronized so the " == majority" is safe
public synchronized void response(Message message)
{
if (logger_.isDebugEnabled())
logger_.debug("Received response in DataRepairHandler : " + message.toString());
responses_.add(message);
readResponseResolver_.preprocess(message);
if (responses_.size() == majority_)
{
Runnable runnable = new WrappedRunnable()
{
public void runMayThrow() throws IOException, DigestMismatchException
{
readResponseResolver_.resolve(responses_);
}
};
// give remaining replicas until timeout to reply and get added to responses_
executor_.schedule(runnable, DatabaseDescriptor.getRpcTimeout(), TimeUnit.MILLISECONDS);
}
}
}
}