package edu.berkeley.thebes.hat.client.clustering; import org.apache.thrift.TException; import org.apache.thrift.async.AsyncMethodCallback; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Queues; import com.google.common.util.concurrent.Uninterruptibles; import edu.berkeley.thebes.common.clustering.RoutingHash; import edu.berkeley.thebes.common.config.Config; import edu.berkeley.thebes.common.config.ConfigParameterTypes.RoutingMode; import edu.berkeley.thebes.common.data.DataItem; import edu.berkeley.thebes.common.data.Version; import edu.berkeley.thebes.common.thrift.ServerAddress; import edu.berkeley.thebes.common.thrift.ThriftDataItem; import edu.berkeley.thebes.hat.common.thrift.ReplicaService; import edu.berkeley.thebes.hat.common.thrift.ReplicaService.AsyncClient.put_call; import edu.berkeley.thebes.hat.common.thrift.ReplicaService.Client; import edu.berkeley.thebes.hat.common.thrift.ReplicaService.AsyncClient.get_call; import edu.berkeley.thebes.hat.common.thrift.ThriftUtil; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Random; import java.util.SortedSet; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; public class QuorumReplicaRouter extends ReplicaRouter { private static Logger logger = LoggerFactory.getLogger(QuorumReplicaRouter.class); private final Map<Integer, List<ServerAddress>> replicaAddressesByCluster; private final int numClusters; private final int numNeighbors; private final int quorum; private final Map<ServerAddress, ReplicaClient> replicaRequestQueues = Maps.newHashMap(); private class ReplicaClient { private Client client; private AtomicBoolean inUse; BlockingQueue<Request<?>> requestBlockingQueue; public ReplicaClient(Client client) { this.client = client; this.inUse = new AtomicBoolean(false); requestBlockingQueue = Queues.newLinkedBlockingQueue(); new Thread(new Runnable() { @Override public void run() { while(true) { Request<?> request = Uninterruptibles.takeUninterruptibly(requestBlockingQueue); request.process(ReplicaClient.this); } } }).start(); } public boolean executeRequest(Request<?> request) { if(!inUse.getAndSet(true)) { requestBlockingQueue.add(request); return true; } return false; } } public QuorumReplicaRouter() throws TTransportException, IOException { assert(Config.getRoutingMode() == RoutingMode.QUORUM); this.replicaAddressesByCluster = Maps.newHashMap(); this.numClusters = Config.getNumClusters(); this.numNeighbors = Config.getServersInCluster().size(); this.quorum = (int) Math.ceil((numNeighbors+1)/2); assert(this.quorum <= this.numNeighbors); logger.debug("quorum is set to "+this.quorum); for (int i = 0; i < numClusters; i ++) { List<ServerAddress> neighbors = Config.getServersInCluster(i+1); for (ServerAddress neighbor : neighbors) { logger.debug("Connecting to " + neighbor); replicaRequestQueues.put(neighbor, new ReplicaClient( ThriftUtil.getReplicaServiceSyncClient(neighbor.getIP(), neighbor.getPort()))); } replicaAddressesByCluster.put(i+1, neighbors); } } @Override public boolean put(String key, DataItem value) throws TException { return performRequest(key, new WriteRequest(key, value)); } @Override public ThriftDataItem get(String key, Version requiredVersion) throws TException { return performRequest(key, new ReadRequest(key, requiredVersion)); } /** Performs the request by queueing N requests and waiting for Q responses. */ public <E> E performRequest(String key, Request<E> request) { int numSent = 0; int numAttempted = 0; int replicaIndex = RoutingHash.hashKey(key, numNeighbors); for (List<ServerAddress> replicasInCluster : replicaAddressesByCluster.values()) { ServerAddress replicaAddress = replicasInCluster.get(replicaIndex); ReplicaClient replica = replicaRequestQueues.get(replicaAddress); numAttempted++; if(replica.executeRequest(request)) numSent++; } assert numSent >= quorum; if(numSent < quorum) logger.warn(String.format("attempted %d, sent %d, need %d", numAttempted, numSent, quorum)); logger.trace("Waiting for response"); E ret = request.getResponseWhenReady(); logger.trace("Got response"); return ret; } private abstract class Request<E> { private BlockingQueue<E> responseChannel; private Semaphore responseSemaphore; AtomicInteger numResponses; private Request() { this.responseChannel = Queues.newLinkedBlockingQueue(); responseSemaphore = new Semaphore(0); numResponses = new AtomicInteger(0); } abstract public void process(ReplicaClient client); protected void notifyResponse(E response) { responseChannel.add(response); responseSemaphore.release(); } public E getResponseWhenReady() { responseSemaphore.acquireUninterruptibly(quorum); return Uninterruptibles.takeUninterruptibly(responseChannel); } } private class WriteRequest extends Request<Boolean> { private String key; private ThriftDataItem value; public WriteRequest(String key, DataItem value) { this.key = key; this.value = value.toThrift(); } public void process(ReplicaClient replica) { try { replica.client.put(key, value); replica.inUse.set(false); notifyResponse(true); } catch (TException e) { logger.error("Error: ", e); replica.inUse.set(false); notifyResponse(false); } } } private class ReadRequest extends Request<ThriftDataItem> { private String key; private Version requiredVersion; public ReadRequest(String key, Version requiredVersion) { this.key = key; this.requiredVersion = requiredVersion != null ? requiredVersion : Version.NULL_VERSION; } public void process(ReplicaClient replica) { try { ThriftDataItem resp = replica.client.get(key, requiredVersion.getThriftVersion()); replica.inUse.set(false); notifyResponse(resp); } catch (TException e) { logger.error("Exception:", e); if (numResponses.incrementAndGet() >= quorum) { replica.inUse.set(false); notifyResponse(new ThriftDataItem()); // "null" } } } } }