/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.seqno; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.SuppressForbidden; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.index.IndexSettings; import java.util.LinkedList; /** * This class generates sequences numbers and keeps track of the so-called "local checkpoint" which is the highest number for which all * previous sequence numbers have been processed (inclusive). */ public class LocalCheckpointTracker { /** * We keep a bit for each sequence number that is still pending. To optimize allocation, we do so in multiple arrays allocating them on * demand and cleaning up while completed. This setting controls the size of the arrays. */ public static Setting<Integer> SETTINGS_BIT_ARRAYS_SIZE = Setting.intSetting("index.seq_no.checkpoint.bit_arrays_size", 1024, 4, Setting.Property.IndexScope); /** * An ordered list of bit arrays representing pending sequence numbers. The list is "anchored" in {@link #firstProcessedSeqNo} which * marks the sequence number the fist bit in the first array corresponds to. */ final LinkedList<FixedBitSet> processedSeqNo = new LinkedList<>(); /** * The size of each bit set representing processed sequence numbers. */ private final int bitArraysSize; /** * The sequence number that the first bit in the first array corresponds to. */ long firstProcessedSeqNo; /** * The current local checkpoint, i.e., all sequence numbers no more than this number have been completed. */ volatile long checkpoint; /** * The next available sequence number. */ private volatile long nextSeqNo; /** * Initialize the local checkpoint service. The {@code maxSeqNo} should be set to the last sequence number assigned, or * {@link SequenceNumbersService#NO_OPS_PERFORMED} and {@code localCheckpoint} should be set to the last known local checkpoint, * or {@link SequenceNumbersService#NO_OPS_PERFORMED}. * * @param indexSettings the index settings * @param maxSeqNo the last sequence number assigned, or {@link SequenceNumbersService#NO_OPS_PERFORMED} * @param localCheckpoint the last known local checkpoint, or {@link SequenceNumbersService#NO_OPS_PERFORMED} */ public LocalCheckpointTracker(final IndexSettings indexSettings, final long maxSeqNo, final long localCheckpoint) { if (localCheckpoint < 0 && localCheckpoint != SequenceNumbersService.NO_OPS_PERFORMED) { throw new IllegalArgumentException( "local checkpoint must be non-negative or [" + SequenceNumbersService.NO_OPS_PERFORMED + "] " + "but was [" + localCheckpoint + "]"); } if (maxSeqNo < 0 && maxSeqNo != SequenceNumbersService.NO_OPS_PERFORMED) { throw new IllegalArgumentException( "max seq. no. must be non-negative or [" + SequenceNumbersService.NO_OPS_PERFORMED + "] but was [" + maxSeqNo + "]"); } bitArraysSize = SETTINGS_BIT_ARRAYS_SIZE.get(indexSettings.getSettings()); firstProcessedSeqNo = localCheckpoint == SequenceNumbersService.NO_OPS_PERFORMED ? 0 : localCheckpoint + 1; nextSeqNo = maxSeqNo == SequenceNumbersService.NO_OPS_PERFORMED ? 0 : maxSeqNo + 1; checkpoint = localCheckpoint; } /** * Issue the next sequence number. * * @return the next assigned sequence number */ synchronized long generateSeqNo() { return nextSeqNo++; } /** * Marks the processing of the provided sequence number as completed as updates the checkpoint if possible. * * @param seqNo the sequence number to mark as completed */ public synchronized void markSeqNoAsCompleted(final long seqNo) { // make sure we track highest seen sequence number if (seqNo >= nextSeqNo) { nextSeqNo = seqNo + 1; } if (seqNo <= checkpoint) { // this is possible during recovery where we might replay an operation that was also replicated return; } final FixedBitSet bitSet = getBitSetForSeqNo(seqNo); final int offset = seqNoToBitSetOffset(seqNo); bitSet.set(offset); if (seqNo == checkpoint + 1) { updateCheckpoint(); } } /** * The current checkpoint which can be advanced by {@link #markSeqNoAsCompleted(long)}. * * @return the current checkpoint */ public long getCheckpoint() { return checkpoint; } /** * The maximum sequence number issued so far. * * @return the maximum sequence number */ long getMaxSeqNo() { return nextSeqNo - 1; } /** * constructs a {@link SeqNoStats} object, using local state and the supplied global checkpoint * * @implNote this is needed to make sure the local checkpoint and max seq no are consistent */ synchronized SeqNoStats getStats(final long globalCheckpoint) { return new SeqNoStats(getMaxSeqNo(), getCheckpoint(), globalCheckpoint); } /** * Waits for all operations up to the provided sequence number to complete. * * @param seqNo the sequence number that the checkpoint must advance to before this method returns * @throws InterruptedException if the thread was interrupted while blocking on the condition */ @SuppressForbidden(reason = "Object#wait") synchronized void waitForOpsToComplete(final long seqNo) throws InterruptedException { while (checkpoint < seqNo) { // notified by updateCheckpoint this.wait(); } } /** * Moves the checkpoint to the last consecutively processed sequence number. This method assumes that the sequence number following the * current checkpoint is processed. */ @SuppressForbidden(reason = "Object#notifyAll") private void updateCheckpoint() { assert Thread.holdsLock(this); assert checkpoint < firstProcessedSeqNo + bitArraysSize - 1 : "checkpoint should be below the end of the first bit set (o.w. current bit set is completed and shouldn't be there)"; assert getBitSetForSeqNo(checkpoint + 1) == processedSeqNo.getFirst() : "checkpoint + 1 doesn't point to the first bit set (o.w. current bit set is completed and shouldn't be there)"; assert getBitSetForSeqNo(checkpoint + 1).get(seqNoToBitSetOffset(checkpoint + 1)) : "updateCheckpoint is called but the bit following the checkpoint is not set"; try { // keep it simple for now, get the checkpoint one by one; in the future we can optimize and read words FixedBitSet current = processedSeqNo.getFirst(); do { checkpoint++; // the checkpoint always falls in the first bit set or just before. If it falls // on the last bit of the current bit set, we can clean it. if (checkpoint == firstProcessedSeqNo + bitArraysSize - 1) { processedSeqNo.removeFirst(); firstProcessedSeqNo += bitArraysSize; assert checkpoint - firstProcessedSeqNo < bitArraysSize; current = processedSeqNo.peekFirst(); } } while (current != null && current.get(seqNoToBitSetOffset(checkpoint + 1))); } finally { // notifies waiters in waitForOpsToComplete this.notifyAll(); } } /** * Return the bit array for the provided sequence number, possibly allocating a new array if needed. * * @param seqNo the sequence number to obtain the bit array for * @return the bit array corresponding to the provided sequence number */ private FixedBitSet getBitSetForSeqNo(final long seqNo) { assert Thread.holdsLock(this); assert seqNo >= firstProcessedSeqNo : "seqNo: " + seqNo + " firstProcessedSeqNo: " + firstProcessedSeqNo; final long bitSetOffset = (seqNo - firstProcessedSeqNo) / bitArraysSize; if (bitSetOffset > Integer.MAX_VALUE) { throw new IndexOutOfBoundsException( "sequence number too high; got [" + seqNo + "], firstProcessedSeqNo [" + firstProcessedSeqNo + "]"); } while (bitSetOffset >= processedSeqNo.size()) { processedSeqNo.add(new FixedBitSet(bitArraysSize)); } return processedSeqNo.get((int) bitSetOffset); } /** * Obtain the position in the bit array corresponding to the provided sequence number. The bit array corresponding to the sequence * number can be obtained via {@link #getBitSetForSeqNo(long)}. * * @param seqNo the sequence number to obtain the position for * @return the position in the bit array corresponding to the provided sequence number */ private int seqNoToBitSetOffset(final long seqNo) { assert Thread.holdsLock(this); assert seqNo >= firstProcessedSeqNo; return ((int) (seqNo - firstProcessedSeqNo)) % bitArraysSize; } }