package me.prettyprint.cassandra.locking; import static me.prettyprint.hector.api.factory.HFactory.createColumn; import static me.prettyprint.hector.api.factory.HFactory.createMutator; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import me.prettyprint.cassandra.serializers.StringSerializer; import me.prettyprint.hector.api.Cluster; import me.prettyprint.hector.api.beans.ColumnSlice; import me.prettyprint.hector.api.beans.HColumn; import me.prettyprint.hector.api.factory.HFactory; import me.prettyprint.hector.api.locking.HLock; import me.prettyprint.hector.api.locking.HLockManagerConfigurator; import me.prettyprint.hector.api.locking.HLockTimeoutException; import me.prettyprint.hector.api.mutation.Mutator; import me.prettyprint.hector.api.query.QueryResult; import me.prettyprint.hector.api.query.SliceQuery; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Joiner; import com.google.common.collect.Lists; import com.google.common.collect.Maps; /** * Wait Chain implementation created by Dominic Williams, reviewed by Aaron * Morton and Patricio Echague. * * @author patricioe (Patricio Echague - patricioe@gmail.com) * @author tnine (Todd Nine) * */ public class HLockManagerImpl extends AbstractLockManager { private static final Logger logger = LoggerFactory.getLogger(HLockManagerImpl.class); private ScheduledExecutorService scheduler; private long lockTtl = 5000; private int colTtl = 5; private int maxSelectSize = 10; public HLockManagerImpl(Cluster cluster, HLockManagerConfigurator hlc) { super(cluster, hlc); scheduler = Executors.newScheduledThreadPool(lockManagerConfigurator.getNumberOfLockObserverThreads()); lockTtl = lockManagerConfigurator.getLocksTTLInMillis(); colTtl = (int) (lockTtl / 1000); maxSelectSize = hlc.getMaxSelectSize(); } /* * (non-Javadoc) * * @see * me.prettyprint.hector.api.locking.HLockManager#acquire(me.prettyprint.hector * .api.locking.HLock) */ @Override public void acquire(HLock lock) { acquire(lock, Long.MAX_VALUE - System.currentTimeMillis() - 10000); } /** * {@inheritDoc} */ @Override public void acquire(HLock lock, long timeout) { verifyPrecondition(lock); // Generate the internal lock id (CLID) maybeSetInternalLockId(lock); writeLock(lock); // Pairs of type <LockId, CommandSeparatedSeenLockIds> Map<String, String> canBeEarlier = readExistingLocks(lock); String nextWaitingClientId = null; long waitStart = System.currentTimeMillis(); while (true) { // If it is just me... if (canBeEarlier.size() <= 1) { setAcquired(lock, canBeEarlier); return; } // We can't get the lock, and we've timed out, give up if (waitStart + timeout < System.currentTimeMillis()) { deleteLock(lock); throw new HLockTimeoutException(String.format("Unable to get lock before %d ", waitStart + timeout)); } boolean recv_all_acks = true; // Let's see of other nodes know me for (Entry<String, String> otherLock : canBeEarlier.entrySet()) { if (!lock.getLockId().equals(otherLock.getKey()) && !hasThisLockSeenMe(otherLock.getValue(), lock.getLockId())) { recv_all_acks = false; break; } } List<String> canBeEarlierSortedList = null; // If everyone acknowledged to have seen me then ... if (recv_all_acks) { canBeEarlierSortedList = Lists.newArrayList(canBeEarlier.keySet()); // sort them Collections.sort(canBeEarlierSortedList); nextWaitingClientId = canBeEarlierSortedList.get(0); // check if we are the first ones if (nextWaitingClientId.equals(lock.getLockId())) { break; } } // Let everyone know what I have already seen writeLock(lock, canBeEarlier.keySet()); smartWait(lockManagerConfigurator.getBackOffRetryDelayInMillis()); // Refresh the list, but only read locks we read at our initial acquire // for optimization canBeEarlier = readExistingLocks(lock); } if (logger.isDebugEnabled()) { logLock(lock, canBeEarlier.keySet()); } setAcquired(lock, canBeEarlier); } /** * Start the heartbeat thread before we return * * @param lock */ private void setAcquired(HLock lock, Map<String, String> canBeEarlier) { // start the heartbeat Future<Void> heartbeat = scheduler.schedule(new Heartbeat(lock), lockTtl / 2, TimeUnit.MILLISECONDS); ((HLockImpl) lock).setHeartbeat(heartbeat); ((HLockImpl) lock).setAcquired(true); if (logger.isDebugEnabled()) { logLock(lock, canBeEarlier.keySet()); } } private static void logLock(HLock lock, Set<String> earlier) { List<String> canBeEarlierSortedList = Lists.newArrayList(earlier); // sort them Collections.sort(canBeEarlierSortedList); String peers = Joiner.on(", ").join(canBeEarlierSortedList); logger.debug("{} acquired lock. Peers are {}", lock, peers); } /** * Here for testing purposes only, this should never really be invoked */ public void shutdownScheduler() { scheduler.shutdownNow(); } private void smartWait(long sleepTime) { try { Thread.sleep((sleepTime + (long) (Math.random() * sleepTime))); } catch (InterruptedException e) { // throw new RuntimeException(); // swallow, we woke up early, not worth re-throwing an exception logger.warn("Interrupted while waiting", e); } } private boolean hasThisLockSeenMe(String commaSeparatedLockIds, String myLockId) { String[] seenLocksIds = commaSeparatedLockIds.split(","); for (int i = 0; i < seenLocksIds.length; i++) { if (seenLocksIds[i].equals(myLockId)) return true; } return false; } /** * Fill up the lock id info if it does not exist. * * @param lock * the lock object to fill up with a new generated lock id for this * client/thread */ private void maybeSetInternalLockId(HLock lock) { if (lock.getLockId() == null) { lock.setLockId(generateLockId()); } } @Override public void release(HLock lock) { verifyPrecondition(lock); deleteLock(lock); ((HLockImpl) lock).setAcquired(false); } /** * Generates a CLID (Client Lock ID) * */ private String generateLockId() { return UUID.randomUUID().toString(); } private void verifyPrecondition(HLock lock) { assert lock != null; if (lock.getPath() == null) throw new RuntimeException("Lock path cannot be null"); } private void writeLock(HLock lock) { writeLock(lock, lock.getLockId().toString()); } private void writeLock(HLock lock, Set<String> keySet) { String seenLockIds = Joiner.on(",").join(keySet); writeLock(lock, seenLockIds); } private void writeLock(HLock lock, String seenLockIds) { Mutator<String> mutator = createMutator(keyspace, StringSerializer.get()); mutator.addInsertion(lock.getPath(), lockManagerConfigurator.getLockManagerCF(), createColumnForLock(lock.getLockId(), seenLockIds)); mutator.execute(); } private void deleteLock(HLock lock) { // cancel the heartbeat task if it exists Future<Void> heartbeat = ((HLockImpl) lock).getHeartbeat(); if (heartbeat != null) { heartbeat.cancel(false); } Mutator<String> mutator = createMutator(keyspace, StringSerializer.get()); mutator.addDeletion(lock.getPath(), lockManagerConfigurator.getLockManagerCF(), lock.getLockId(), StringSerializer.get(), keyspace.createClock()); mutator.execute(); } /** * Reads all existing locks for this lock path * * @param lockPath * a lock path * @return a list of locks waiting on this lockpath */ private Map<String, String> readExistingLocks(HLock lock) { // logger.debug("Started reading all columns"); SliceQuery<String, String, String> sliceQuery = HFactory .createSliceQuery(keyspace, StringSerializer.get(), StringSerializer.get(), StringSerializer.get()) .setColumnFamily(lockManagerConfigurator.getLockManagerCF()).setKey(lock.getPath()); //we only care about the first 2 locks, anything else is simply queued. Select 10 just to be safe if the clients aren't ordered properly sliceQuery.setRange(null, null, false, maxSelectSize); QueryResult<ColumnSlice<String, String>> queryResult = sliceQuery.execute(); // logger.debug("Finished reading all columns"); return getResults(queryResult); } /** * Reads all existing locks for this lock path * * @param lockPath * a lock path * @return a list of locks waiting on this lockpath */ private Map<String, String> readExistingLocks(HLock lock, String lockName) { // logger.debug("Started reading existing columns"); SliceQuery<String, String, String> sliceQuery = HFactory .createSliceQuery(keyspace, StringSerializer.get(), StringSerializer.get(), StringSerializer.get()) .setColumnFamily(lockManagerConfigurator.getLockManagerCF()).setKey(lock.getPath()); sliceQuery.setColumnNames(lockName); QueryResult<ColumnSlice<String, String>> queryResult = sliceQuery.execute(); // logger.debug("Finished reading existing columns"); return getResults(queryResult); } private Map<String, String> getResults(QueryResult<ColumnSlice<String, String>> queryResult) { Map<String, String> result = Maps.newHashMap(); for (HColumn<String, String> col : queryResult.get().getColumns()) { result.put(col.getName(), col.getValue()); } return result; } private HColumn<String, String> createColumnForLock(String name, String value) { return createColumn(name, value, keyspace.createClock(), colTtl, StringSerializer.get(), StringSerializer.get()); } @Override public HLock createLock(String lockPath) { return new HLockImpl(lockPath, generateLockId()); } /** * Simple scheduled class to write heart beats to the column families. This * heart beat should be used to signal we're still waiting for a lock * * @author tnine * */ private class Heartbeat implements Callable<Void> { private HLock lock; private Heartbeat(HLock lock) { this.lock = lock; } /* * (non-Javadoc) * * @see java.util.concurrent.Callable#call() */ @Override public Void call() throws Exception { logger.debug("{} heartbeat", lock); /** * We check that we still exist in Cassandra, then re write our state for * 2 reasons. * * Cassandra is the authoritative system for locking * * If there is lag and another client appears before us in the list after * we acquire the lock, we never want to acknowledge it. We simply keep * writing the state we had when we acquired the lock originally. This * ensures that we never get a race condition on initial lock due to clock * drift or column ordering in Cassandra */ Map<String, String> existing = readExistingLocks(lock, lock.getLockId()); String values = existing.get(lock.getLockId()); if (values == null) { logger.debug("{} lock has been removed from cassandra. Short circuiting", lock); return null; } writeLock(lock, values); scheduler.schedule(this, lockTtl / 2, TimeUnit.MILLISECONDS); return null; } } }