/* * Copyright 2013 Jive Software, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.jivesoftware.os.amza.service; import com.google.common.base.Preconditions; import com.jivesoftware.os.amza.api.FailedToAchieveQuorumException; import com.jivesoftware.os.amza.api.partition.Consistency; import com.jivesoftware.os.amza.api.partition.PartitionName; import com.jivesoftware.os.amza.api.partition.VersionedPartitionName; import com.jivesoftware.os.amza.api.ring.RingMember; import com.jivesoftware.os.amza.api.scan.RowsChanged; import com.jivesoftware.os.amza.api.stream.ClientUpdates; import com.jivesoftware.os.amza.api.stream.KeyValueStream; import com.jivesoftware.os.amza.api.stream.PrefixedKeyRanges; import com.jivesoftware.os.amza.api.stream.TxKeyValueStream; import com.jivesoftware.os.amza.api.stream.TxKeyValueStream.TxResult; import com.jivesoftware.os.amza.api.stream.UnprefixedWALKeys; import com.jivesoftware.os.amza.api.take.Highwaters; import com.jivesoftware.os.amza.api.take.TakeResult; import com.jivesoftware.os.amza.api.wal.WALHighwater; import com.jivesoftware.os.amza.api.wal.WALUpdated; import com.jivesoftware.os.amza.service.ring.AmzaRingReader; import com.jivesoftware.os.amza.service.storage.SystemWALStorage; import com.jivesoftware.os.amza.service.take.HighwaterStorage; import com.jivesoftware.os.amza.service.take.TakeCoordinator; import com.jivesoftware.os.aquarium.LivelyEndState; import com.jivesoftware.os.jive.utils.ordered.id.OrderIdProvider; import com.jivesoftware.os.mlogger.core.MetricLogger; import com.jivesoftware.os.mlogger.core.MetricLoggerFactory; import java.util.Set; public class SystemPartition implements Partition { private static final MetricLogger LOG = MetricLoggerFactory.getLogger(); private final OrderIdProvider orderIdProvider; private final WALUpdated walUpdated; private final RingMember ringMember; private final VersionedPartitionName versionedPartitionName; private final SystemWALStorage systemWALStorage; private final HighwaterStorage systemHighwaterStorage; private final AckWaters ackWaters; private final AmzaRingStoreReader ringReader; private final TakeCoordinator takeCoordinator; public SystemPartition(OrderIdProvider orderIdProvider, WALUpdated walUpdated, RingMember ringMember, PartitionName partitionName, SystemWALStorage systemWALStorage, HighwaterStorage systemHighwaterStorage, AckWaters ackWaters, AmzaRingStoreReader ringReader, TakeCoordinator takeCoordinator) { this.orderIdProvider = orderIdProvider; this.walUpdated = walUpdated; this.ringMember = ringMember; this.versionedPartitionName = new VersionedPartitionName(partitionName, VersionedPartitionName.STATIC_VERSION); this.systemWALStorage = systemWALStorage; this.systemHighwaterStorage = systemHighwaterStorage; this.ackWaters = ackWaters; this.ringReader = ringReader; this.takeCoordinator = takeCoordinator; } public PartitionName getPartitionName() { return versionedPartitionName.getPartitionName(); } @Override public void commit(Consistency consistency, byte[] prefix, ClientUpdates updates, long timeoutInMillis) throws Exception { Set<RingMember> neighbors = ringReader.getNeighboringRingMembers(AmzaRingReader.SYSTEM_RING, -1); int takeQuorum = consistency.quorum(neighbors.size()); if (takeQuorum > 0 && neighbors.size() < takeQuorum) { throw new FailedToAchieveQuorumException("There are an insufficent number of nodes to achieve desired take quorum:" + takeQuorum); } long timestampAndVersion = orderIdProvider.nextId(); long start = System.currentTimeMillis(); RowsChanged commit = systemWALStorage.update(versionedPartitionName, prefix, (highwaters, scan) -> updates.updates((key, value, valueTimestamp, valueTombstone) -> { long timestamp = valueTimestamp > 0 ? valueTimestamp : timestampAndVersion; return scan.row(-1L, key, value, timestamp, valueTombstone, timestampAndVersion); }), walUpdated); if (takeQuorum > 0) { LOG.debug("Awaiting quorum for {} ms", timeoutInMillis); int takenBy = ackWaters.await(versionedPartitionName, commit.getLargestCommittedTxId(), neighbors, takeQuorum, timeoutInMillis, -1, takeCoordinator); if (takenBy < takeQuorum) { throw new FailedToAchieveQuorumException("Timed out attempting to achieve desired take quorum:" + takeQuorum + " got:" + takenBy); } } } @Override public boolean get(Consistency consistency, byte[] prefix, boolean requiresOnline, UnprefixedWALKeys keys, KeyValueStream stream) throws Exception { boolean got = systemWALStorage.get(versionedPartitionName, prefix, keys, stream); return got; } @Override public boolean scan(PrefixedKeyRanges ranges, boolean hydrateValues, boolean requiresOnline, KeyValueStream stream) throws Exception { return ranges.consume((fromPrefix, fromKey, toPrefix, toKey) -> { if (fromKey == null && toKey == null) { boolean result = systemWALStorage.rowScan(versionedPartitionName, stream, true); if (!result) { return false; } } else { boolean result = systemWALStorage.rangeScan(versionedPartitionName, fromPrefix, fromKey, toPrefix, toKey, stream, true); if (!result) { return false; } } return true; }); } @Override public TakeResult takeFromTransactionId(long txId, boolean requiresOnline, Highwaters highwaters, TxKeyValueStream stream) throws Exception { return takeFromTransactionIdInternal(false, null, txId, highwaters, stream); } @Override public TakeResult takePrefixFromTransactionId(byte[] prefix, long txId, boolean requiresOnline, Highwaters highwaters, TxKeyValueStream stream) throws Exception { Preconditions.checkNotNull(prefix, "Must specify a prefix"); return takeFromTransactionIdInternal(true, prefix, txId, highwaters, stream); } private TakeResult takeFromTransactionIdInternal(boolean usePrefix, byte[] takePrefix, long txId, Highwaters highwaters, TxKeyValueStream stream) throws Exception { long[] lastTxId = { -1 }; TxResult[] done = new TxResult[1]; WALHighwater partitionHighwater = systemHighwaterStorage.getPartitionHighwater(versionedPartitionName, true); TxKeyValueStream delegateStream = (rowTxId, prefix, key, value, valueTimestamp, valueTombstone, valueVersion) -> { if (done[0] != null && rowTxId > lastTxId[0]) { // streamed to end of txId return done[0]; } if (done[0] != null) { if (done[0].isAccepted()) { // ignore result; lastTxId is unchanged stream.stream(rowTxId, prefix, key, value, valueTimestamp, valueTombstone, valueVersion); } } else { TxResult result = stream.stream(rowTxId, prefix, key, value, valueTimestamp, valueTombstone, valueVersion); if (result.isAccepted()) { if (rowTxId > lastTxId[0]) { lastTxId[0] = rowTxId; } } if (!result.wantsMore()) { if (result.isAccepted()) { // stream to end of txId done[0] = result; } else { // reject entire txId return result; } } } return TxResult.MORE; }; boolean tookToEnd; if (usePrefix) { tookToEnd = systemWALStorage.takeFromTransactionId(versionedPartitionName, takePrefix, txId, highwaters, delegateStream); } else { tookToEnd = systemWALStorage.takeFromTransactionId(versionedPartitionName, txId, highwaters, delegateStream); } return new TakeResult(ringMember, lastTxId[0], tookToEnd ? partitionHighwater : null); } @Override public long count() throws Exception { return systemWALStorage.count(versionedPartitionName); } @Override public long approximateCount() throws Exception { return systemWALStorage.approximateCount(versionedPartitionName); } /*@Override public long highestTxId() throws Exception { return systemWALStorage.highestPartitionTxId(versionedPartitionName); }*/ @Override public LivelyEndState livelyEndState() throws Exception { return LivelyEndState.ALWAYS_ONLINE; } }