// Copyright 2017 JanusGraph Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package org.janusgraph.diskstorage.idmanagement;
import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.*;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import org.janusgraph.diskstorage.*;
import org.janusgraph.diskstorage.util.*;
import org.janusgraph.util.stats.NumberUtil;
import org.janusgraph.diskstorage.util.time.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import org.janusgraph.diskstorage.configuration.Configuration;
import org.janusgraph.diskstorage.keycolumnvalue.KeyColumnValueStore;
import org.janusgraph.diskstorage.keycolumnvalue.KeyRange;
import org.janusgraph.diskstorage.keycolumnvalue.KeySliceQuery;
import org.janusgraph.diskstorage.keycolumnvalue.StoreManager;
import org.janusgraph.diskstorage.keycolumnvalue.StoreTransaction;
import org.janusgraph.diskstorage.locking.TemporaryLockingException;
import org.janusgraph.diskstorage.util.StandardBaseTransactionConfig;
import org.janusgraph.graphdb.database.idassigner.IDPoolExhaustedException;
import org.janusgraph.graphdb.database.idhandling.VariableLong;
/**
* {@link org.janusgraph.diskstorage.IDAuthority} implementation
* assuming that the backing store supports consistent key operations.
* <p/>
* ID blocks are allocated by first applying for an id block, waiting for a
* specified period of time and then checking that the application was the first
* received for that particular id block. If so, the application is considered
* successful. If not, some other process won the application and a new
* application is tried.
* <p/>
* The partition id is used as the key and since key operations are considered
* consistent, this protocol guarantees unique id block assignments.
* <p/>
* @author Matthias Broecheler (me@matthiasb.com)
*/
public class ConsistentKeyIDAuthority extends AbstractIDAuthority implements BackendOperation.TransactionalProvider {
private static final Logger log = LoggerFactory.getLogger(ConsistentKeyIDAuthority.class);
/*
* ID columns are 17 or more bytes long:
*
* -----------------------------------------------------------
* | 8 bytes counter | 8 bytes timestamp | var bytes rid/uid |
* -----------------------------------------------------------
*
* The argument for the following two slice bounds mirrors the
* argument for choosing bounds in ConsistentKeyLocker.
*/
private static final StaticBuffer LOWER_SLICE = BufferUtil.zeroBuffer(1);
private static final StaticBuffer UPPER_SLICE = BufferUtil.oneBuffer(17);
private final StoreManager manager;
private final KeyColumnValueStore idStore;
private final StandardBaseTransactionConfig.Builder storeTxConfigBuilder;
/**
* This belongs in JanusGraphConfig.
*/
private final TimestampProvider times;
private final int rollbackAttempts = 5;
private final Duration rollbackWaitTime = Duration.ofMillis(200L);
private final int partitionBitWdith;
private final ConflictAvoidanceMode conflictAvoidanceMode;
private final int uniqueIdBitWidth;
private final int uniqueIDUpperBound;
private final int uniqueId;
private final boolean randomizeUniqueId;
protected final int randomUniqueIDLimit;
private final Duration waitGracePeriod;
private final boolean supportsInterruption;
private final Random random = new Random();
public ConsistentKeyIDAuthority(KeyColumnValueStore idStore, StoreManager manager, Configuration config) throws BackendException {
super(config);
Preconditions.checkArgument(manager.getFeatures().isKeyConsistent());
this.manager = manager;
this.idStore = idStore;
this.times = config.get(TIMESTAMP_PROVIDER);
this.waitGracePeriod = idApplicationWaitMS.dividedBy(10);
Preconditions.checkNotNull(times);
supportsInterruption = manager.getFeatures().supportsInterruption();
partitionBitWdith = NumberUtil.getPowerOf2(config.get(CLUSTER_MAX_PARTITIONS));
Preconditions.checkArgument(partitionBitWdith>=0 && partitionBitWdith<=16);
uniqueIdBitWidth = config.get(IDAUTHORITY_CAV_BITS);
Preconditions.checkArgument(uniqueIdBitWidth<=16 && uniqueIdBitWidth>=0);
uniqueIDUpperBound = 1<<uniqueIdBitWidth;
storeTxConfigBuilder = new StandardBaseTransactionConfig.Builder().groupName(metricsPrefix).timestampProvider(times);
conflictAvoidanceMode = config.get(IDAUTHORITY_CONFLICT_AVOIDANCE);
if (conflictAvoidanceMode.equals(ConflictAvoidanceMode.GLOBAL_AUTO)) {
Preconditions.checkArgument(!config.has(IDAUTHORITY_CAV_TAG),"Conflicting configuration: a unique id and randomization have been set");
randomizeUniqueId = true;
randomUniqueIDLimit = config.get(IDAUTHORITY_CAV_RETRIES);
Preconditions.checkArgument(randomUniqueIDLimit<uniqueIDUpperBound,"Cannot have more uid retries [%d] than available values [%d]",
randomUniqueIDLimit,uniqueIDUpperBound);
uniqueId = -1;
storeTxConfigBuilder.customOptions(manager.getFeatures().getKeyConsistentTxConfig());
} else {
randomizeUniqueId = false;
Preconditions.checkArgument(!config.has(IDAUTHORITY_CAV_RETRIES),"Retry count is only meaningful when " + IDAUTHORITY_CONFLICT_AVOIDANCE + " is set to " + ConflictAvoidanceMode.GLOBAL_AUTO);
randomUniqueIDLimit = 0;
if (conflictAvoidanceMode.equals(ConflictAvoidanceMode.LOCAL_MANUAL)) {
Preconditions.checkArgument(config.has(IDAUTHORITY_CAV_TAG),"Need to configure a unique id in order to use local consistency");
storeTxConfigBuilder.customOptions(manager.getFeatures().getLocalKeyConsistentTxConfig());
} else {
storeTxConfigBuilder.customOptions(manager.getFeatures().getKeyConsistentTxConfig());
}
uniqueId = config.get(IDAUTHORITY_CAV_TAG);
Preconditions.checkArgument(uniqueId>=0,"Invalid unique id: %s",uniqueId);
Preconditions.checkArgument(uniqueId<uniqueIDUpperBound,"Unique id is too large for bit width [%s]: %s",uniqueIdBitWidth,uniqueId);
}
Preconditions.checkArgument(randomUniqueIDLimit>=0);
}
@Override
public List<KeyRange> getLocalIDPartition() throws BackendException {
return manager.getLocalKeyPartition();
}
@Override
public void close() throws BackendException {
idStore.close();
}
@Override
public boolean supportsInterruption() {
return supportsInterruption;
}
@Override
public StoreTransaction openTx() throws BackendException {
return manager.beginTransaction(storeTxConfigBuilder.build());
}
private long getCurrentID(final StaticBuffer partitionKey) throws BackendException {
List<Entry> blocks = BackendOperation.execute(new BackendOperation.Transactional<List<Entry>>() {
@Override
public List<Entry> call(StoreTransaction txh) throws BackendException {
return idStore.getSlice(new KeySliceQuery(partitionKey, LOWER_SLICE, UPPER_SLICE).setLimit(5), txh);
}
},this,times);
if (blocks == null) throw new TemporaryBackendException("Could not read from storage");
long latest = BASE_ID;
for (Entry e : blocks) {
long counterVal = getBlockValue(e);
if (latest < counterVal) {
latest = counterVal;
}
}
return latest;
}
private int getUniquePartitionID() {
int id;
if (randomizeUniqueId) {
id = random.nextInt(uniqueIDUpperBound);
} else id = uniqueId;
assert id>=0 && id<uniqueIDUpperBound;
return id;
}
private StaticBuffer getPartitionKey(int partition, int idNamespace, int uniqueId) {
assert partition>=0 && partition<(1<<partitionBitWdith);
assert idNamespace>=0;
assert uniqueId>=0 && uniqueId<(1<<uniqueIdBitWidth);
int[] components = new int[2];
components[0] = (partitionBitWdith>0?(partition<<(Integer.SIZE-partitionBitWdith)):0) + uniqueId;
components[1]=idNamespace;
return BufferUtil.getIntBuffer(components);
}
@Override
public synchronized IDBlock getIDBlock(final int partition, final int idNamespace, Duration timeout) throws BackendException {
Preconditions.checkArgument(partition>=0 && partition<(1<<partitionBitWdith),"Invalid partition id [%s] for bit width [%s]",partition, partitionBitWdith);
Preconditions.checkArgument(idNamespace>=0); //can be any non-negative value
final Timer methodTime = times.getTimer().start();
final long blockSize = getBlockSize(idNamespace);
final long idUpperBound = getIdUpperBound(idNamespace);
final int maxAvailableBits = (VariableLong.unsignedBitLength(idUpperBound)-1)-uniqueIdBitWidth;
Preconditions.checkArgument(maxAvailableBits>0,"Unique id bit width [%s] is too wide for id-namespace [%s] id bound [%s]"
,uniqueIdBitWidth,idNamespace,idUpperBound);
final long idBlockUpperBound = (1l<<maxAvailableBits);
final List<Integer> exhaustedUniquePIDs = new ArrayList<Integer>(randomUniqueIDLimit);
Duration backoffMS = idApplicationWaitMS;
Preconditions.checkArgument(idBlockUpperBound>blockSize,
"Block size [%s] is larger than upper bound [%s] for bit width [%s]",blockSize,idBlockUpperBound,uniqueIdBitWidth);
while (methodTime.elapsed().compareTo(timeout) < 0) {
final int uniquePID = getUniquePartitionID();
final StaticBuffer partitionKey = getPartitionKey(partition,idNamespace,uniquePID);
try {
long nextStart = getCurrentID(partitionKey);
if (idBlockUpperBound - blockSize <= nextStart) {
log.info("ID overflow detected on partition({})-namespace({}) with uniqueid {}. Current id {}, block size {}, and upper bound {} for bit width {}.",
partition, idNamespace, uniquePID, nextStart, blockSize, idBlockUpperBound, uniqueIdBitWidth);
if (randomizeUniqueId) {
exhaustedUniquePIDs.add(uniquePID);
if (exhaustedUniquePIDs.size() == randomUniqueIDLimit)
throw new IDPoolExhaustedException(String.format("Exhausted %d uniqueid(s) on partition(%d)-namespace(%d): %s",
exhaustedUniquePIDs.size(), partition, idNamespace, Joiner.on(",").join(exhaustedUniquePIDs)));
else
throw new UniqueIDExhaustedException(
String.format("Exhausted ID partition(%d)-namespace(%d) with uniqueid %d (uniqueid attempt %d/%d)",
partition, idNamespace, uniquePID, exhaustedUniquePIDs.size(), randomUniqueIDLimit));
}
throw new IDPoolExhaustedException("Exhausted id block for partition("+partition+")-namespace("+idNamespace+") with upper bound: " + idBlockUpperBound);
}
// calculate the start (inclusive) and end (exclusive) of the allocation we're about to attempt
assert idBlockUpperBound - blockSize > nextStart;
long nextEnd = nextStart + blockSize;
StaticBuffer target = null;
// attempt to write our claim on the next id block
boolean success = false;
try {
Timer writeTimer = times.getTimer().start();
target = getBlockApplication(nextEnd, writeTimer.getStartTime());
final StaticBuffer finalTarget = target; // copy for the inner class
BackendOperation.execute(new BackendOperation.Transactional<Boolean>() {
@Override
public Boolean call(StoreTransaction txh) throws BackendException {
idStore.mutate(partitionKey, Arrays.asList(StaticArrayEntry.of(finalTarget)), KeyColumnValueStore.NO_DELETIONS, txh);
return true;
}
},this,times);
writeTimer.stop();
Duration writeElapsed = writeTimer.elapsed();
if (idApplicationWaitMS.compareTo(writeElapsed) < 0) {
throw new TemporaryBackendException("Wrote claim for id block [" + nextStart + ", " + nextEnd + ") in " + (writeElapsed) + " => too slow, threshold is: " + idApplicationWaitMS);
} else {
assert 0 != target.length();
final StaticBuffer[] slice = getBlockSlice(nextEnd);
/* At this point we've written our claim on [nextStart, nextEnd),
* but we haven't yet guaranteed the absence of a contending claim on
* the same id block from another machine
*/
sleepAndConvertInterrupts(idApplicationWaitMS.plus(waitGracePeriod));
// Read all id allocation claims on this partition, for the counter value we're claiming
List<Entry> blocks = BackendOperation.execute(new BackendOperation.Transactional<List<Entry>>() {
@Override
public List<Entry> call(StoreTransaction txh) throws BackendException {
return idStore.getSlice(new KeySliceQuery(partitionKey, slice[0], slice[1]), txh);
}
},this,times);
if (blocks == null) throw new TemporaryBackendException("Could not read from storage");
if (blocks.isEmpty())
throw new PermanentBackendException("It seems there is a race-condition in the block application. " +
"If you have multiple JanusGraph instances running on one physical machine, ensure that they have unique machine idAuthorities");
/* If our claim is the lexicographically first one, then our claim
* is the most senior one and we own this id block
*/
if (target.equals(blocks.get(0).getColumnAs(StaticBuffer.STATIC_FACTORY))) {
ConsistentKeyIDBlock idblock = new ConsistentKeyIDBlock(nextStart,blockSize,uniqueIdBitWidth,uniquePID);
if (log.isDebugEnabled()) {
log.debug("Acquired ID block [{}] on partition({})-namespace({}) (my rid is {})",
new Object[]{idblock, partition, idNamespace, new String(uid)});
}
success = true;
return idblock;
} else {
// Another claimant beat us to this id block -- try again.
log.debug("Failed to acquire ID block [{},{}) (another host claimed it first)", nextStart, nextEnd);
}
}
} finally {
if (!success && null != target) {
//Delete claim to not pollute id space
for (int attempt = 0; attempt < rollbackAttempts; attempt++) {
try {
final StaticBuffer finalTarget = target; // copy for the inner class
BackendOperation.execute(new BackendOperation.Transactional<Boolean>() {
@Override
public Boolean call(StoreTransaction txh) throws BackendException {
idStore.mutate(partitionKey, KeyColumnValueStore.NO_ADDITIONS, Arrays.asList(finalTarget), txh);
return true;
}
}, new BackendOperation.TransactionalProvider() { //Use normal consistency level for these non-critical delete operations
@Override
public StoreTransaction openTx() throws BackendException {
return manager.beginTransaction(storeTxConfigBuilder.build());
}
@Override
public void close() {}
},times);
break;
} catch (BackendException e) {
log.warn("Storage exception while deleting old block application - retrying in {}", rollbackWaitTime, e);
if (!rollbackWaitTime.isZero())
sleepAndConvertInterrupts(rollbackWaitTime);
}
}
}
}
} catch (UniqueIDExhaustedException e) {
// No need to increment the backoff wait time or to sleep
log.warn(e.getMessage());
} catch (TemporaryBackendException e) {
backoffMS = Durations.min(backoffMS.multipliedBy(2), idApplicationWaitMS.multipliedBy(32));
log.warn("Temporary storage exception while acquiring id block - retrying in {}: {}", backoffMS, e);
sleepAndConvertInterrupts(backoffMS);
}
}
throw new TemporaryLockingException(String.format("Reached timeout %d (%s elapsed) when attempting to allocate id block on partition(%d)-namespace(%d)",
timeout.getNano(), methodTime.toString(), partition, idNamespace));
}
private final StaticBuffer[] getBlockSlice(long blockValue) {
StaticBuffer[] slice = new StaticBuffer[2];
slice[0] = new WriteByteBuffer(16).putLong(-blockValue).putLong(0).getStaticBuffer();
slice[1] = new WriteByteBuffer(16).putLong(-blockValue).putLong(-1).getStaticBuffer();
return slice;
}
private final StaticBuffer getBlockApplication(long blockValue, Instant timestamp) {
WriteByteBuffer bb = new WriteByteBuffer(
8 // counter long
+ 8 // time in ms
+ uidBytes.length);
bb.putLong(-blockValue).putLong(times.getTime(timestamp));
WriteBufferUtil.put(bb, uidBytes);
return bb.getStaticBuffer();
}
private final long getBlockValue(Entry column) {
return -column.getLong(0);
}
private void sleepAndConvertInterrupts(Duration d) throws BackendException {
try {
times.sleepPast(times.getTime().plus(d));
} catch (InterruptedException e) {
throw new PermanentBackendException(e);
}
}
private static class UniqueIDExhaustedException extends Exception {
private static final long serialVersionUID = 1L;
public UniqueIDExhaustedException(String msg) {
super(msg);
}
}
private static class ConsistentKeyIDBlock implements IDBlock {
private final long startIDCound;
private final long numIds;
private final int uniqueIDBitWidth;
private final int uniqueID;
private ConsistentKeyIDBlock(long startIDCound, long numIDs, int uniqueIDBitWidth, int uniqueID) {
this.startIDCound = startIDCound;
this.numIds = numIDs;
this.uniqueIDBitWidth = uniqueIDBitWidth;
this.uniqueID = uniqueID;
}
@Override
public long numIds() {
return numIds;
}
@Override
public long getId(long index) {
if (index<0 || index>= numIds) throw new ArrayIndexOutOfBoundsException((int)index);
assert uniqueID<(1<<uniqueIDBitWidth);
long id = ((startIDCound+index)<<uniqueIDBitWidth) + uniqueID;
return id;
}
@Override
public String toString() {
String interval = "["+startIDCound+","+(startIDCound+ numIds)+")";
if (uniqueIDBitWidth>0) interval+="/"+uniqueID+":"+uniqueIDBitWidth;
return interval;
}
}
}