/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.service;
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReferenceArray;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.ColumnFamily;
import org.apache.cassandra.db.ReadResponse;
import org.apache.cassandra.db.Row;
import org.apache.cassandra.net.IAsyncCallback;
import org.apache.cassandra.net.Message;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.Pair;
import org.apache.log4j.Logger;
public class ParallelQuorumResponseHandler implements IAsyncCallback, Runnable, ILocalCallback
{
protected static final Logger logger = Logger.getLogger( ParallelQuorumResponseHandler.class );
protected final Semaphore condition;
protected final AtomicReferenceArray<Pair<InetAddress, ReadResponse>> responses;
private SimpleReadResponseResolver responseResolver;
private final long startTime;
private final int responseCount;
private ColumnFamily resolvedSuperset;
public ParallelQuorumResponseHandler(int endpointCount, int responseCount, SimpleReadResponseResolver responseResolver)
{
this.responseCount = responseCount;
this.condition= new Semaphore(endpointCount);
int permits = this.condition.drainPermits();
assert permits == endpointCount;
responses = new AtomicReferenceArray<Pair<InetAddress, ReadResponse>>(endpointCount);
this.responseResolver = responseResolver;
startTime = System.currentTimeMillis();
}
/**
* Waits for the very first response (from local or remote) and returns result as soon as it is received.
*
* @return
* @throws TimeoutException
* @throws IOException
*/
public Row get() throws TimeoutException, IOException
{
long timeout = DatabaseDescriptor.getRpcTimeout() - (System.currentTimeMillis() - startTime);
boolean success;
try
{
success = condition.tryAcquire(responseCount, timeout, TimeUnit.MILLISECONDS);
}
catch (InterruptedException ex)
{
throw new AssertionError(ex);
}
if (!success)
{
throw new TimeoutException("Parallel read operation timed out .");
}
return resolve();
}
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
@Override
public void run()
{
consistencyCheck();
}
// only resolving superset WITHOUT sending out repairs for all responses we got so far.
private Row resolve()
{
ArrayList<ColumnFamily> versions = new ArrayList<ColumnFamily>(responses.length());
for (int i=0;i<responses.length();i++)
{
Pair<InetAddress, ReadResponse> pair = responses.get(i);
if (pair==null)
break;
versions.add(pair.right.row().cf);
}
if (versions.size()==responses.length())
{
// if we've got ALL responses - caching the result of superset resolution,
// so consistency stage dont have to do superset resolution again.
// this happens in 2-3% of cases whe all nodes are up, and 100% when one of nodes in down (RF=3)
Row row = responseResolver.resolve(versions);
this.resolvedSuperset = row.cf;
return row;
}
return responseResolver.resolve(versions);
}
/**
* Waits when all requested endpoints respond and does read repair, if neccessary
*/
public void consistencyCheck()
{
if (this.resolvedSuperset == null)
{
long timeout = DatabaseDescriptor.getRpcTimeout() - (System.currentTimeMillis() - startTime);
try
{
// responseCount permits are already acquired by get(). We hit here only after successful get()
boolean success = condition.tryAcquire( responses.length() - responseCount,timeout, TimeUnit.MILLISECONDS);
if (success)
StorageProxy.countStrongConsistencyAll();
else
StorageProxy.countStrongConsistencyUnder();
}
catch (InterruptedException ex)
{
throw new AssertionError(ex);
}
}
ArrayList<InetAddress> endpoints = new ArrayList<InetAddress>(responses.length());
ArrayList<ColumnFamily> versions = new ArrayList<ColumnFamily>(responses.length());
for (int i=0;i<responses.length();i++)
{
Pair<InetAddress, ReadResponse> pair = responses.get(i);
if (pair==null)
break;
endpoints.add(pair.left);
versions.add(pair.right.row().cf);
}
if (this.resolvedSuperset == null)
{
// resolving and submitting repair for all responses we got so far
responseResolver.resolve(versions, endpoints);
} else
{
// only submitting repairs reusing precomputed superset
responseResolver.maybeScheduleRepairs(this.resolvedSuperset, versions, endpoints);
StorageProxy.countStrongConsistencyReuseSuperset();
}
}
/**
* Adds response to collection
* @param response
* @return number of this response. 0 is the very 1st
*/
private int addResponse(Pair<InetAddress,ReadResponse> response)
{
for (int i=0; i<responses.length() ;i++)
{
if (responses.compareAndSet(i, null, response))
return i;
}
assert false : "All messages already arrived: "+responses+", message: "+response;
return -1;
}
/* (non-Javadoc)
* @see org.apache.cassandra.service.ILocalCallback#localResponse(org.apache.cassandra.db.Row)
*/
@Override
public void localResponse(Row data)
{
ReadResponse readResponse = new ReadResponse(data);
addResponse(new Pair<InetAddress, ReadResponse>(FBUtilities.getLocalAddress(), readResponse ));
condition.release();
}
public void response(Message message)
{
try {
ReadResponse data = responseResolver.parseResponse(message);
addResponse(new Pair<InetAddress, ReadResponse>(message.getFrom(), data));
condition.release();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}