package org.gbif.occurrence.persistence.keygen;
import org.gbif.hbase.util.ResultReader;
import org.gbif.occurrence.common.config.OccHBaseConfiguration;
import org.gbif.occurrence.persistence.api.KeyLookupResult;
import org.gbif.occurrence.persistence.hbase.Columns;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Maps;
import com.google.inject.Inject;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Meter;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* An extension of AbstractHBaseKeyPersistenceService with a generateKey implementation that uses an HBase
* implementation of the algorithm described at
* <a href="http://dev.gbif.org/code/snippet/CR-OCC-5">http://dev.gbif.org/code/snippet/CR-OCC-5</a>.
*/
public class HBaseLockingKeyService extends AbstractHBaseKeyPersistenceService {
private static final Logger LOG = LoggerFactory.getLogger(HBaseLockingKeyService.class);
private static final long WAIT_BEFORE_RETRY_MS = 5000;
private static final int WAIT_SKEW = 4000;
private static final long STALE_LOCK_TIME = 5 * 60 * 1000;
public static final int COUNTER_ROW = 1;
// The number of IDs to reserve at a time in batch
private static final int BATCHED_ID_SIZE = 100;
// the next available key to allocate
private int currentKey;
// our reserved upper key limit for the current batch
private int maxReservedKeyInclusive;
private final Meter reattempts = Metrics.newMeter(HBaseLockingKeyService.class, "reattempts", "reattempts",
TimeUnit.SECONDS);
@Inject
public HBaseLockingKeyService(OccHBaseConfiguration cfg, Connection connection) {
super(cfg, connection, new OccurrenceKeyBuilder());
}
@Override
public KeyLookupResult generateKey(Set<String> uniqueStrings, String scope) {
Map<String, KeyStatus> statusMap = Maps.newTreeMap(); // required: predictable sorting for e.g. testing
Map<String, Integer> existingKeyMap = Maps.newTreeMap(); // required: predictable sorting for e.g. testing
byte[] lockId = Bytes.toBytes(UUID.randomUUID().toString());
// lookupTable schema: lookupKey | status | lock | key
// all of our locks will have the same timestamp
long now = System.currentTimeMillis();
Set<String> lookupKeys = keyBuilder.buildKeys(uniqueStrings, scope);
boolean failed = false;
Integer key = null;
Integer foundKey = null;
for (String lookupKey : lookupKeys) {
Result row = lookupTableStore.getRow(lookupKey);
LOG.debug("Lookup for [{}] produced [{}]", lookupKey, row);
KeyStatus status = null;
byte[] existingLock = null;
if (row != null) {
String rawStatus = ResultReader.getString(row, Columns.OCCURRENCE_COLUMN_FAMILY,
Columns.LOOKUP_STATUS_COLUMN, null);
if (rawStatus != null) {
status = KeyStatus.valueOf(rawStatus);
}
existingLock = ResultReader.getBytes(row, Columns.OCCURRENCE_COLUMN_FAMILY,
Columns.LOOKUP_LOCK_COLUMN, null);
key = ResultReader.getInteger(row, Columns.OCCURRENCE_COLUMN_FAMILY,
Columns.LOOKUP_KEY_COLUMN, null);
LOG.debug("Got existing status [{}] existingLock [{}] key [{}]", status, existingLock, key);
}
if (status == KeyStatus.ALLOCATED) {
LOG.debug("Status ALLOCATED, using found key [{}]", key);
// even if existingLock is != null, ALLOCATED means the key exists and is final
statusMap.put(lookupKey, KeyStatus.ALLOCATED);
existingKeyMap.put(lookupKey, key);
if (foundKey == null) {
foundKey = key;
} else {
// we've found conflicting keys for our lookupKeys - this is fatal
if (foundKey.intValue() != key.intValue()) {
failWithConflictingLookup(existingKeyMap);
}
}
LOG.debug("Status ALLOCATED, using found key [{}]", foundKey);
} else if (existingLock == null) {
// lock is ours for the taking - checkAndPut lockId, expecting null for lockId
boolean gotLock = lookupTableStore.checkAndPut(lookupKey, Columns.LOOKUP_LOCK_COLUMN, lockId,
Columns.LOOKUP_LOCK_COLUMN, null, now);
if (gotLock) {
statusMap.put(lookupKey, KeyStatus.ALLOCATING);
LOG.debug("Grabbed free lock, now ALLOCATING [{}]", lookupKey);
} else {
failed = true;
LOG.debug("Failed to grab free lock for [{}], breaking", lookupKey);
break;
}
} else {
// somebody has written their lockId and so has the lock, but they haven't finished yet (status != ALLOCATED)
Long existingLockTs = ResultReader.getTimestamp(row, Columns.OCCURRENCE_COLUMN_FAMILY,
Columns.LOOKUP_LOCK_COLUMN);
if (now - existingLockTs > STALE_LOCK_TIME) {
LOG.debug("Found stale lock for [{}]", lookupKey);
// Someone died before releasing lock.
// Note that key could be not null here - this means that thread had the lock, wrote the key, but then
// died before releasing lock.
// checkandPut our lockId, expecting lock to match the existing lock
boolean gotLock = lookupTableStore.checkAndPut(lookupKey, Columns.LOOKUP_LOCK_COLUMN,
lockId, Columns.LOOKUP_LOCK_COLUMN, existingLock, now);
if (gotLock) {
statusMap.put(lookupKey, KeyStatus.ALLOCATING);
LOG.debug("Reset stale lock, now ALLOCATING [{}]", lookupKey);
} else {
// someone beat us to this lock, in one of two ways
// 1) they grabbed lock, wrote new id, and released lock, so now status is ALLOCATED and id is final
// 2) they grabbed lock so status is a newer lock uuid with recent timestamp
// in either case we're toast - abort and try again
failed = true;
LOG.debug("Failed to reset stale lock for [{}], breaking", lookupKey);
break;
}
} else {
// someone has a current lock, we need to give up and try again
failed = true;
LOG.debug("Hit valid, current lock for [{}], breaking", lookupKey);
break;
}
}
}
if (failed) {
LOG.debug("Failed to get lock. Releasing held locks and trying again.");
reattempts.mark();
releaseLocks(statusMap);
try {
Random random = new Random();
TimeUnit.MILLISECONDS.sleep(WAIT_BEFORE_RETRY_MS + random.nextInt(WAIT_SKEW) - random.nextInt(WAIT_SKEW));
} catch (InterruptedException e) {
// do nothing
}
// recurse
return generateKey(uniqueStrings, scope);
}
// now we have map of every lookupKey to either ALLOCATED or ALLOCATING, and locks on all ALLOCATING
KeyLookupResult lookupResult;
if (foundKey == null) {
key = getNextKey();
lookupResult = new KeyLookupResult(key, true);
LOG.debug("Now assigning new key [{}]", key);
} else {
key = foundKey;
lookupResult = new KeyLookupResult(key, false);
LOG.debug("Using found key [{}]", key);
}
// write the key and update status to ALLOCATED
for (Map.Entry<String, KeyStatus> entry : statusMap.entrySet()) {
if (entry.getValue() == KeyStatus.ALLOCATING) {
// TODO: combine into one put
lookupTableStore.putInt(entry.getKey(), Columns.LOOKUP_KEY_COLUMN, key);
lookupTableStore.putString(entry.getKey(), Columns.LOOKUP_STATUS_COLUMN, KeyStatus.ALLOCATED.toString());
}
}
releaseLocks(statusMap);
LOG.debug("<< generateKey (generated? [{}] key [{}])", !key.equals(foundKey), key);
return lookupResult;
}
/**
* Provides the next available key. Because throughput of an incrementColumnValue is limited by HBase to a few
* thousand calls per second, this implementation reserves a batch of IDs at a time, and then allocates them to
* the calling threads, until they are exhausted, when it will go and reserve another batch. Failure scenarios
* will therefore mean IDs go unused. This is expected to be a rare scenario and therefore acceptable.
*
* @return the next key
*/
private synchronized int getNextKey() {
// if we have exhausted our reserved keys, get a new batch of them
if (currentKey == maxReservedKeyInclusive) {
// get batch
Long longKey = counterTableStore.incrementColumnValue(COUNTER_ROW, Columns.COUNTER_COLUMN, BATCHED_ID_SIZE);
if (longKey > Integer.MAX_VALUE) {
throw new IllegalStateException("HBase issuing keys larger than Integer can support");
}
maxReservedKeyInclusive = longKey.intValue();
// safer to calculate our guaranteed safe range than rely on what nextKey was set to
currentKey = maxReservedKeyInclusive - BATCHED_ID_SIZE;
}
currentKey++;
return currentKey;
}
private void releaseLocks(Map<String, KeyStatus> statusMap) {
for (Map.Entry<String, KeyStatus> entry : statusMap.entrySet()) {
if (entry.getValue() == KeyStatus.ALLOCATING) {
lookupTableStore.delete(entry.getKey(), Columns.LOOKUP_LOCK_COLUMN);
}
}
}
private enum KeyStatus {
ALLOCATING, ALLOCATED
}
}