/*
* Copyright 2017 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package com.github.ambry.store;
import com.codahale.metrics.MetricRegistry;
import com.github.ambry.config.StoreConfig;
import com.github.ambry.config.VerifiableProperties;
import com.github.ambry.utils.ByteBufferInputStream;
import com.github.ambry.utils.MockTime;
import com.github.ambry.utils.Pair;
import com.github.ambry.utils.TestUtils;
import com.github.ambry.utils.Time;
import com.github.ambry.utils.Utils;
import com.github.ambry.utils.UtilsTest;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.NavigableSet;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import static org.junit.Assert.*;
/**
* Represents a carefully built state that includes {@link Log} and {@link PersistentIndex} instances along with
* data structures that represent the source of truth for data those instances must contain. Allows some manipulation
* of state and some useful functionality.
*/
class CuratedLogIndexState {
private static final byte[] RECOVERY_INFO = new byte[100];
// setupTestState() is coupled to these numbers. Changing them *will* cause setting test state or tests to fail.
private static final long LOG_CAPACITY = 20000;
private static final long SEGMENT_CAPACITY = 2000;
private static final long HARD_DELETE_START_OFFSET = 11;
private static final long HARD_DELETE_LAST_PART_SIZE = 13;
static final int MAX_IN_MEM_ELEMENTS = 5;
static final long DELAY_BETWEEN_LAST_MODIFIED_TIMES_MS = 10 * Time.MsPerSec;
static final StoreKeyFactory STORE_KEY_FACTORY;
// deliberately do not divide the capacities perfectly.
static final long PUT_RECORD_SIZE = 53;
static final long DELETE_RECORD_SIZE = 29;
static {
try {
STORE_KEY_FACTORY = Utils.getObj("com.github.ambry.store.MockIdFactory");
Arrays.fill(CuratedLogIndexState.RECOVERY_INFO, (byte) 0);
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
// The reference index to compare against. Key is index segment start Offset, Value is the reference index segment.
// This reflects exactly how PersistentIndex is supposed to look.
final TreeMap<Offset, TreeMap<MockId, IndexValue>> referenceIndex = new TreeMap<>();
// A map of all the keys. The key is the MockId and the value is a pair of index segment start Offsets.
// first Offset represents the index segment start offset of the PUT entry.
// second Offset represents the index segment start offset of the DELETE entry (null if the key has not been deleted).
final Map<MockId, Pair<Offset, Offset>> indexSegmentStartOffsets = new HashMap<>();
// A map of all the keys. The key is the MockId and the value is a pair of IndexValues.
// The first IndexValue represents the value of the PUT entry.
// The second IndexValue represents the value of the DELETE entry (null if the key has not been deleted).
final Map<MockId, Pair<IndexValue, IndexValue>> allKeys = new HashMap<>();
// map of all index segments to their last modified times
final Map<Offset, Long> lastModifiedTimesInSecs = new HashMap<>();
// Set of all deleted keys
final Set<MockId> deletedKeys = new HashSet<>();
// Set of all expired keys
final Set<MockId> expiredKeys = new HashSet<>();
// Set of all keys that are not deleted/expired
final Set<MockId> liveKeys = new HashSet<>();
// The keys in offset order as they appear in the log.
final TreeMap<Offset, Pair<MockId, LogEntry>> logOrder = new TreeMap<>();
// the properties that will used to generate a StoreConfig. Clear before use if required.
final Properties properties = new Properties();
// the time instance that will be used in the index
final MockTime time = new MockTime();
// the scheduler used in the index
final ScheduledExecutorService scheduler = Utils.newScheduler(1, false);
// The MessageStoreRecovery that is used with the index
MessageStoreRecovery recovery = new DummyMessageStoreRecovery();
// The MessageStoreHardDelete that is used with the index
MessageStoreHardDelete hardDelete = new MockMessageStoreHardDelete();
// The Log which has the data
Log log;
// The index of the log
PersistentIndex index;
// The session ID associated with the index
UUID sessionId = UUID.randomUUID();
// the incarnationId associated with the store
UUID incarnationId = UUID.randomUUID();
// The MetricRegistry that is used with the index
MetricRegistry metricRegistry = new MetricRegistry();
// Variables that represent the folder where the data resides
private final File tempDir;
private final String tempDirStr;
// used by getUniqueId() to make sure keys are never regenerated in a single test run.
private final Set<MockId> generatedKeys = new HashSet<>();
/**
* Creates state in order to make sure all cases are represented and log-index tests don't need to do any setup
* individually. For understanding the created index, please read the source code which is annotated with comments.
* <p/>
* Also verifies that the state in {@link #referenceIndex} matches the state in the real index (which implicitly
* verifies some functionality of {@link PersistentIndex} and behaviour of the {@link Journal} in the index).
* @param isLogSegmented {@code true} if segmented. {@code false} otherwise.
* @param tempDir the directory where the log and index files should be created.
* @throws InterruptedException
* @throws IOException
* @throws StoreException
*/
CuratedLogIndexState(boolean isLogSegmented, File tempDir) throws InterruptedException, IOException, StoreException {
this(isLogSegmented, tempDir, false, true);
}
/**
* Creates state in order to make sure all cases are represented and log-index tests don't need to do any setup
* individually. For understanding the created index, please read the source code which is annotated with comments.
* <p/>
* Also verifies that the state in {@link #referenceIndex} matches the state in the real index (which implicitly
* verifies some functionality of {@link PersistentIndex} and behaviour of the {@link Journal} in the index).
* @param isLogSegmented {@code true} if segmented. {@code false} otherwise.
* @param tempDir the directory where the log and index files should be created.
* @param hardDeleteEnabled if {@code true}, hard delete is enabled.
* @param initState sets up a diverse set of entries if {@code true}. Leaves the log and index empty if {@code false}.
* @throws InterruptedException
* @throws IOException
* @throws StoreException
*/
CuratedLogIndexState(boolean isLogSegmented, File tempDir, boolean hardDeleteEnabled, boolean initState)
throws InterruptedException, IOException, StoreException {
this.tempDir = tempDir;
tempDirStr = tempDir.getAbsolutePath();
long segmentCapacity = isLogSegmented ? CuratedLogIndexState.SEGMENT_CAPACITY : CuratedLogIndexState.LOG_CAPACITY;
StoreMetrics metrics = new StoreMetrics(tempDirStr, metricRegistry);
log = new Log(tempDirStr, CuratedLogIndexState.LOG_CAPACITY, segmentCapacity, metrics);
metricRegistry = new MetricRegistry();
properties.put("store.index.max.number.of.inmem.elements",
Integer.toString(CuratedLogIndexState.MAX_IN_MEM_ELEMENTS));
properties.put("store.enable.hard.delete", Boolean.toString(hardDeleteEnabled));
// not used but set anyway since this is a package private variable.
properties.put("store.segment.size.in.bytes", Long.toString(segmentCapacity));
initIndex(metricRegistry);
if (initState) {
setupTestState(isLogSegmented, segmentCapacity);
}
}
/**
* Destroys state and cleans up as required.
* @throws InterruptedException
* @throws IOException
* @throws StoreException
*/
void destroy() throws InterruptedException, IOException, StoreException {
index.close();
log.close();
scheduler.shutdown();
assertTrue(scheduler.awaitTermination(1, TimeUnit.SECONDS));
}
/**
* Adds {@code count} number of put entries each of size {@code size} and that expire at {@code expiresAtMs} to the
* index (both real and reference).
* @param count the number of PUT entries to add.
* @param size the size of each PUT entry.
* @param expiresAtMs the time at which each of the PUT entries expires.
* @return the list of the added entries.
* @throws InterruptedException
* @throws IOException
* @throws StoreException
*/
List<IndexEntry> addPutEntries(int count, long size, long expiresAtMs)
throws InterruptedException, IOException, StoreException {
if (count <= 0) {
throw new IllegalArgumentException("Number of put entries to add cannot be <= 0");
}
List<IndexEntry> indexEntries = new ArrayList<>(count);
Offset expectedJournalLastOffset = null;
Offset endOffsetOfPrevMsg = index.getCurrentEndOffset();
for (int i = 0; i < count; i++) {
byte[] dataWritten = appendToLog(size);
FileSpan fileSpan = log.getFileSpanForMessage(endOffsetOfPrevMsg, size);
IndexValue value = new IndexValue(size, fileSpan.getStartOffset(), expiresAtMs);
MockId id = getUniqueId();
IndexEntry entry = new IndexEntry(id, value);
indexEntries.add(entry);
logOrder.put(fileSpan.getStartOffset(), new Pair<>(id, new LogEntry(dataWritten, value)));
Offset indexSegmentStartOffset = generateReferenceIndexSegmentStartOffset(fileSpan.getStartOffset());
indexSegmentStartOffsets.put(id, new Pair<Offset, Offset>(indexSegmentStartOffset, null));
allKeys.put(id, new Pair<IndexValue, IndexValue>(value, null));
if (!referenceIndex.containsKey(indexSegmentStartOffset)) {
// rollover will occur
advanceTime(DELAY_BETWEEN_LAST_MODIFIED_TIMES_MS);
referenceIndex.put(indexSegmentStartOffset, new TreeMap<MockId, IndexValue>());
}
referenceIndex.get(indexSegmentStartOffset).put(id, value);
if (expiresAtMs != Utils.Infinite_Time && expiresAtMs < time.milliseconds()) {
expiredKeys.add(id);
} else {
liveKeys.add(id);
}
index.addToIndex(Collections.singletonList(entry), fileSpan);
lastModifiedTimesInSecs.put(indexSegmentStartOffset, time.seconds());
expectedJournalLastOffset = fileSpan.getStartOffset();
endOffsetOfPrevMsg = fileSpan.getEndOffset();
}
assertEquals("End Offset of index not as expected", endOffsetOfPrevMsg, index.getCurrentEndOffset());
assertEquals("Journal's last offset not as expected", expectedJournalLastOffset, index.journal.getLastOffset());
return indexEntries;
}
/**
* Adds a delete entry in the index (real and reference) for {@code idToDelete}.
* @param idToDelete the id to be deleted.
* @return the {@link FileSpan} of the added entries.
* @throws InterruptedException
* @throws IOException
* @throws StoreException
*/
FileSpan addDeleteEntry(MockId idToDelete) throws InterruptedException, IOException, StoreException {
byte[] dataWritten = appendToLog(CuratedLogIndexState.DELETE_RECORD_SIZE);
Offset endOffsetOfPrevMsg = index.getCurrentEndOffset();
FileSpan fileSpan = log.getFileSpanForMessage(endOffsetOfPrevMsg, CuratedLogIndexState.DELETE_RECORD_SIZE);
boolean forcePut = false;
IndexValue newValue;
if (allKeys.containsKey(idToDelete)) {
IndexValue value = getExpectedValue(idToDelete, true);
newValue = new IndexValue(value.getSize(), value.getOffset(), value.getFlags(), value.getExpiresAtMs(),
Utils.Infinite_Time);
newValue.setNewOffset(fileSpan.getStartOffset());
newValue.setNewSize(CuratedLogIndexState.DELETE_RECORD_SIZE);
} else {
newValue =
new IndexValue(CuratedLogIndexState.DELETE_RECORD_SIZE, fileSpan.getStartOffset(), Utils.Infinite_Time);
newValue.clearOriginalMessageOffset();
indexSegmentStartOffsets.put(idToDelete, new Pair<Offset, Offset>(null, null));
allKeys.put(idToDelete, new Pair<IndexValue, IndexValue>(null, null));
forcePut = true;
}
newValue.setFlag(IndexValue.Flags.Delete_Index);
logOrder.put(fileSpan.getStartOffset(), new Pair<>(idToDelete, new LogEntry(dataWritten, newValue)));
Offset indexSegmentStartOffset = generateReferenceIndexSegmentStartOffset(fileSpan.getStartOffset());
Pair<Offset, Offset> keyLocations = indexSegmentStartOffsets.get(idToDelete);
indexSegmentStartOffsets.put(idToDelete, new Pair<>(keyLocations.getFirst(), indexSegmentStartOffset));
Pair<IndexValue, IndexValue> keyValues = allKeys.get(idToDelete);
allKeys.put(idToDelete, new Pair<>(keyValues.getFirst(), newValue));
if (!referenceIndex.containsKey(indexSegmentStartOffset)) {
// rollover will occur
advanceTime(DELAY_BETWEEN_LAST_MODIFIED_TIMES_MS);
referenceIndex.put(indexSegmentStartOffset, new TreeMap<MockId, IndexValue>());
}
referenceIndex.get(indexSegmentStartOffset).put(idToDelete, newValue);
endOffsetOfPrevMsg = fileSpan.getEndOffset();
if (forcePut) {
index.addToIndex(new IndexEntry(idToDelete, newValue), fileSpan);
} else {
index.markAsDeleted(idToDelete, fileSpan);
}
lastModifiedTimesInSecs.put(indexSegmentStartOffset, time.seconds());
assertEquals("End Offset of index not as expected", endOffsetOfPrevMsg, index.getCurrentEndOffset());
assertEquals("Journal's last offset not as expected", fileSpan.getStartOffset(), index.journal.getLastOffset());
if (!deletedKeys.contains(idToDelete)) {
markAsDeleted(idToDelete);
}
return fileSpan;
}
/**
* Advances time by {@code ms} and adjusts {@link #liveKeys} if any of the keys in it expire.
* @param ms the amount in ms to advance.
* @throws InterruptedException
*/
void advanceTime(long ms) throws InterruptedException {
time.sleep(ms);
Iterator<MockId> liveKeysIterator = liveKeys.iterator();
while (liveKeysIterator.hasNext()) {
MockId id = liveKeysIterator.next();
IndexValue value = allKeys.get(id).getFirst();
if (value.getExpiresAtMs() != Utils.Infinite_Time && value.getExpiresAtMs() < time.milliseconds()) {
expiredKeys.add(id);
liveKeysIterator.remove();
}
}
}
/**
* Appends random data of size {@code size} to the {@link #log}.
* @param size the size of data that needs to be appeneded.
* @return the data that was appended.
* @throws IOException
*/
byte[] appendToLog(long size) throws IOException {
byte[] bytes = TestUtils.getRandomBytes((int) size);
if (size > CuratedLogIndexState.HARD_DELETE_START_OFFSET) {
// ensure at least one byte is set to 1 for hard delete verification purposes
int randomByte = (int) (CuratedLogIndexState.HARD_DELETE_START_OFFSET + TestUtils.RANDOM.nextInt(
(int) (size - CuratedLogIndexState.HARD_DELETE_START_OFFSET
- CuratedLogIndexState.HARD_DELETE_LAST_PART_SIZE)));
bytes[randomByte] = 1;
}
ByteBuffer buffer = ByteBuffer.wrap(bytes);
ReadableByteChannel channel = Channels.newChannel(new ByteBufferInputStream(buffer));
log.appendFrom(channel, buffer.capacity());
return bytes;
}
/**
* @return a {@link MockId} that is unique and has not been generated before in this run.
*/
MockId getUniqueId() {
MockId id;
do {
id = new MockId(UtilsTest.getRandomString(10));
} while (generatedKeys.contains(id));
generatedKeys.add(id);
return id;
}
/**
* Gets the value that is expected to obtained from the {@link PersistentIndex}.
* @param id the {@link MockId} whose value is required.
* @param wantPut {@code true} if the {@link IndexValue} of the PUT entry is required.
* @return the value that is expected to obtained from the {@link PersistentIndex}
*/
IndexValue getExpectedValue(MockId id, boolean wantPut) {
Pair<IndexValue, IndexValue> indexValues = allKeys.get(id);
return wantPut || indexValues.getSecond() == null ? indexValues.getFirst() : indexValues.getSecond();
}
/**
* Gets the data that is expected to obtained from the {@link Log}.
* @param id the {@link MockId} whose value is required.
* @param wantPut {@code true} if the {@link IndexValue} of the PUT entry is required.
* @return the value that is expected to obtained from the {@link PersistentIndex}
*/
byte[] getExpectedData(MockId id, boolean wantPut) {
Pair<IndexValue, IndexValue> indexValues = allKeys.get(id);
Offset offset = wantPut ? indexValues.getFirst().getOffset() : indexValues.getSecond().getOffset();
return logOrder.get(offset).getSecond().buffer;
}
/**
* Returns all ids that have records in the provided {@code segment}.
* @param segment the {@link LogSegment} to get the ids of all records for.
* @return all ids that have records in the provided {@code segment}.
*/
Set<MockId> getIdsInLogSegment(LogSegment segment) {
Set<MockId> idsInSegment = new HashSet<>();
Offset indexSegmentStartOffset = new Offset(segment.getName(), segment.getStartOffset());
while (indexSegmentStartOffset != null && indexSegmentStartOffset.getName().equals(segment.getName())) {
idsInSegment.addAll(referenceIndex.get(indexSegmentStartOffset).keySet());
indexSegmentStartOffset = referenceIndex.higherKey(indexSegmentStartOffset);
}
return idsInSegment;
}
/**
* Gets an ID to delete from the index segment with start offset {@code indexSegmentStartOffset}. The returned ID will
* have been removed from {@link #liveKeys} and added to {@link #deletedKeys}. A call to
* {@link #addDeleteEntry(MockId)} is expected.
* @param indexSegmentStartOffset the start offset of the index segment from which an ID is required.
* @return an ID to delete from the index segment with start offset {@code indexSegmentStartOffset}. {@code null} if
* there is no such candidate.
*/
MockId getIdToDeleteFromIndexSegment(Offset indexSegmentStartOffset) {
MockId deleteCandidate = null;
TreeMap<MockId, IndexValue> indexSegment = referenceIndex.get(indexSegmentStartOffset);
for (Map.Entry<MockId, IndexValue> entry : indexSegment.entrySet()) {
MockId id = entry.getKey();
if (liveKeys.contains(id) && allKeys.get(id).getFirst().getExpiresAtMs() == Utils.Infinite_Time) {
deleteCandidate = id;
break;
}
}
if (deleteCandidate != null) {
markAsDeleted(deleteCandidate);
}
return deleteCandidate;
}
/**
* Gets an ID to delete from the given log segment. The returned ID will have been removed from {@link #liveKeys} and
* added to {@link #deletedKeys}.
* @param segment the {@link LogSegment} from which an ID is required. {@code null} if there is no such candidate.
* @return the ID to delete.
*/
MockId getIdToDeleteFromLogSegment(LogSegment segment) {
MockId deleteCandidate;
Offset indexSegmentStartOffset = new Offset(segment.getName(), segment.getStartOffset());
do {
deleteCandidate = getIdToDeleteFromIndexSegment(indexSegmentStartOffset);
indexSegmentStartOffset = referenceIndex.higherKey(indexSegmentStartOffset);
if (indexSegmentStartOffset == null || !indexSegmentStartOffset.getName().equals(segment.getName())) {
break;
}
} while (deleteCandidate == null);
return deleteCandidate;
}
/**
* Gets the expected size of the valid data at {@code deleteReferenceTimeMs} in {@code segment}.
* @param segment the {@link LogSegment} whose valid size is required.
* @param deleteReferenceTimeMs the reference time in ms until which deletes are relevant.
* @param expiryReferenceTimeMs the reference time in ms until which expirations are relevant.
* @return the expected size of the valid data at {@code deleteReferenceTimeMs} in {@code segment}.
*/
long getValidDataSizeForLogSegment(LogSegment segment, long deleteReferenceTimeMs, long expiryReferenceTimeMs) {
List<IndexEntry> validEntries =
getValidIndexEntriesForLogSegment(segment, deleteReferenceTimeMs, expiryReferenceTimeMs);
long size = 0;
for (IndexEntry indexEntry : validEntries) {
size += indexEntry.getValue().getSize();
}
return size;
}
/**
* Gets all the valid index entries (taking into account different reference times) in the {@code segment}.
* @param segment the {@link LogSegment} from which valid index entries are required.
* @param deleteReferenceTimeMs the reference time in ms until which deletes are relevant.
* @param expiryReferenceTimeMs the reference time in ms until which expirations are relevant.
* @return all the valid index entries in the {@code segment}.
*/
List<IndexEntry> getValidIndexEntriesForLogSegment(LogSegment segment, long deleteReferenceTimeMs,
long expiryReferenceTimeMs) {
List<IndexEntry> validEntries = new ArrayList<>();
Offset indexSegmentStartOffset = new Offset(segment.getName(), segment.getStartOffset());
while (indexSegmentStartOffset != null && indexSegmentStartOffset.getName().equals(segment.getName())) {
validEntries.addAll(
getValidIndexEntriesForIndexSegment(indexSegmentStartOffset, deleteReferenceTimeMs, expiryReferenceTimeMs));
indexSegmentStartOffset = referenceIndex.higherKey(indexSegmentStartOffset);
}
return validEntries;
}
/**
* Verify that deleted entries are hard deleted
* @param deletedIds the set of {@link MockId}s that are deleted
* @throws IOException
*/
void verifyEntriesForHardDeletes(Set<MockId> deletedIds) throws IOException {
for (MockId id : deletedIds) {
IndexValue putValue = allKeys.get(id).getFirst();
if (putValue != null) {
Offset offset = putValue.getOffset();
LogSegment segment = log.getSegment(offset.getName());
long size = putValue.getSize() - CuratedLogIndexState.HARD_DELETE_START_OFFSET
- CuratedLogIndexState.HARD_DELETE_LAST_PART_SIZE;
ByteBuffer readBuf = ByteBuffer.allocate((int) size);
segment.readInto(readBuf, offset.getOffset() + CuratedLogIndexState.HARD_DELETE_START_OFFSET);
readBuf.flip();
while (readBuf.hasRemaining()) {
assertEquals("Hard delete has not zeroed out the data", (byte) 0, readBuf.get());
}
}
}
}
/**
* Returns {@code true} if the {@code id} is considered deleted at {@code referenceTimeMs}.
* @param id the {@link MockId} to check.
* @param referenceTimeMs the reference time in ms at which the check needs to be performed.
* @return {@code true} if the {@code id} is considered deleted at {@code referenceTimeMs}. {@code false} otherwise.
*/
boolean isDeletedAt(MockId id, long referenceTimeMs) {
Offset deleteIndexSegmentStartOffset = indexSegmentStartOffsets.get(id).getSecond();
return deleteIndexSegmentStartOffset != null
&& lastModifiedTimesInSecs.get(deleteIndexSegmentStartOffset) * Time.MsPerSec < referenceTimeMs;
}
/**
* Returns {@code true} if the {@code id} is considered expired at {@code referenceTimeMs}.
* @param id the {@link MockId} to check.
* @param referenceTimeMs the reference time in ms at which the check needs to be performed.
* @return {@code true} if the {@code id} is considered expired at {@code referenceTimeMs}. {@code false} otherwise.
*/
boolean isExpiredAt(MockId id, long referenceTimeMs) {
long expiresAtMs = allKeys.get(id).getFirst().getExpiresAtMs();
return expiresAtMs != Utils.Infinite_Time && expiresAtMs < referenceTimeMs;
}
/**
* Ensures that the {@link PersistentIndex} is sane and correct by checking that
* 1. It contains no duplicate entries.
* 2. The ordering of PUT and DELETE entries is correct.
* 3. There are no offsets in the log not accounted for in the index.
* @throws IOException
* @throws StoreException
*/
void verifyRealIndexSanity() throws IOException, StoreException {
Map<MockId, Boolean> keyToDeleteSeenMap = new HashMap<>();
IndexSegment prevIndexSegment = null;
for (IndexSegment indexSegment : index.getIndexSegments().values()) {
Offset indexSegmentStartOffset = indexSegment.getStartOffset();
if (prevIndexSegment == null) {
assertEquals("There are offsets in the log not accounted for in index", log.getFirstSegment().getStartOffset(),
indexSegmentStartOffset.getOffset());
} else if (prevIndexSegment.getLogSegmentName().equals(indexSegment.getLogSegmentName())) {
assertEquals("There are offsets in the log not accounted for in index", prevIndexSegment.getEndOffset(),
indexSegmentStartOffset);
} else {
LogSegment segment = log.getSegment(prevIndexSegment.getLogSegmentName());
assertEquals("There are offsets in the log not accounted for in index", segment.getEndOffset(),
prevIndexSegment.getEndOffset().getOffset());
segment = log.getNextSegment(segment);
assertEquals("There are offsets in the log not accounted for in index", segment.getStartOffset(),
indexSegmentStartOffset.getOffset());
}
NavigableSet<IndexEntry> indexEntries = new TreeSet<>(PersistentIndex.INDEX_ENTRIES_OFFSET_COMPARATOR);
List<MessageInfo> infos = new ArrayList<>();
indexSegment.getEntriesSince(null, new FindEntriesCondition(Long.MAX_VALUE), infos, new AtomicLong(0));
for (MessageInfo info : infos) {
MockId id = (MockId) info.getStoreKey();
IndexValue value = indexSegment.find(id);
indexEntries.add(new IndexEntry(id, value));
Boolean deleteSeen = keyToDeleteSeenMap.get(id);
if (info.isDeleted()) {
if (deleteSeen != null) {
assertFalse("Duplicated DELETE record for " + id, deleteSeen);
}
keyToDeleteSeenMap.put(id, true);
} else {
if (deleteSeen != null) {
if (deleteSeen) {
fail("PUT record after delete record for " + id);
} else {
fail("Duplicated PUT record for " + id);
}
}
keyToDeleteSeenMap.put(id, false);
}
}
long expectedOffset = indexSegmentStartOffset.getOffset();
for (IndexEntry entry : indexEntries) {
IndexValue value = entry.getValue();
while (expectedOffset < indexSegment.getEndOffset().getOffset() && expectedOffset != value.getOffset()
.getOffset()) {
// this might be because a PUT and DELETE entry are in the same segment.
// find the record that should have been there
// NOTE: This is NOT built to work after compaction (like the rest of this class). It will fail on a very
// NOTE: specific corner case - where the PUT and DELETE entry for a blob ended up in the same index
// NOTE: segment after compaction (the DELETE wasn't eligible to be "counted").
Offset offset = new Offset(indexSegment.getLogSegmentName(), expectedOffset);
IndexValue putValue = logOrder.get(offset).getSecond().indexValue;
expectedOffset += putValue.getSize();
}
assertEquals("There are offsets in the log not accounted for in index", expectedOffset,
value.getOffset().getOffset());
expectedOffset += value.getSize();
}
if (prevIndexSegment != null) {
assertTrue("Last modified time of an older index segment > newer index segment",
prevIndexSegment.getLastModifiedTimeMs() <= indexSegment.getLastModifiedTimeMs());
}
prevIndexSegment = indexSegment;
}
}
/**
* Creates the index instance with the provided {@code metricRegistry}.
* @param metricRegistry the {@link MetricRegistry} to use to record metrics.
* @throws StoreException
*/
void initIndex(MetricRegistry metricRegistry) throws StoreException {
StoreMetrics metrics = new StoreMetrics(tempDirStr, metricRegistry);
StoreConfig config = new StoreConfig(new VerifiableProperties(properties));
sessionId = UUID.randomUUID();
index = new PersistentIndex(tempDirStr, scheduler, log, config, CuratedLogIndexState.STORE_KEY_FACTORY, recovery,
hardDelete, metrics, time, sessionId, incarnationId);
}
/**
* Reloads the index. Uses the class variables as parameters. For e.g, if a particular implementation of
* {@link MessageStoreRecovery} is desired, it can be set to {@link #recovery} and this function called. The newly
* created index will use that implementation of {@link MessageStoreRecovery}.
* @param closeBeforeReload {@code true} if index should be closed before reload. {@code false} otherwise.
* @param deleteCleanShutdownFile {@code true} if the clean shutdown file should be deleted to mimic unclean shutdown.
* Ignored if {@code closeBeforeReload} is {@code false}.
* @throws StoreException
*/
void reloadIndex(boolean closeBeforeReload, boolean deleteCleanShutdownFile) throws StoreException {
if (closeBeforeReload) {
index.close();
if (deleteCleanShutdownFile) {
assertTrue("The clean shutdown file could not be deleted",
new File(tempDir, PersistentIndex.CLEAN_SHUTDOWN_FILENAME).delete());
}
}
metricRegistry = new MetricRegistry();
initIndex(metricRegistry);
}
/**
* Reloads the log and index by closing and recreating the class variables.
* @param initIndex creates the index instance if {@code true}, if not, sets {@link #index} to {@code null} and it
* has to be initialized with a call to {@link #initIndex(MetricRegistry)}.
* @throws IOException
* @throws StoreException
*/
void reloadLog(boolean initIndex) throws IOException, StoreException {
long segmentCapacity = log.getSegmentCapacity();
index.close();
log.close();
metricRegistry = new MetricRegistry();
StoreMetrics metrics = new StoreMetrics(tempDirStr, metricRegistry);
log = new Log(tempDirStr, LOG_CAPACITY, segmentCapacity, metrics);
index = null;
if (initIndex) {
initIndex(metricRegistry);
}
}
/**
* Closes the index and clears all the index files.
* @throws StoreException
*/
void closeAndClearIndex() throws StoreException {
index.close();
// delete all index files
File[] indexSegmentFiles = tempDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(IndexSegment.INDEX_SEGMENT_FILE_NAME_SUFFIX) || name.endsWith(
IndexSegment.BLOOM_FILE_NAME_SUFFIX);
}
});
assertNotNull("Could not load index segment files", indexSegmentFiles);
for (File indexSegmentFile : indexSegmentFiles) {
assertTrue("Could not deleted index segment file", indexSegmentFile.delete());
}
logOrder.clear();
referenceIndex.clear();
indexSegmentStartOffsets.clear();
allKeys.clear();
liveKeys.clear();
expiredKeys.clear();
}
/**
* Sets up some state in order to make sure all cases are represented and the tests don't need to do any setup
* individually. For understanding the created index, please read the source code which is annotated with comments.
* <p/>
* Also tests critical functionality of {@link PersistentIndex} and behaviour of the {@link Journal} in the index.
* Also verifies that the state in {@link #referenceIndex} matches the state in the real index.
* @param isLogSegmented {@code true} if segmented. {@code false} otherwise.
* @param segmentCapacity the intended capacity of each segment
* @throws InterruptedException
* @throws IOException
* @throws StoreException
*/
private void setupTestState(boolean isLogSegmented, long segmentCapacity)
throws InterruptedException, IOException, StoreException {
Offset expectedStartOffset = new Offset(log.getFirstSegment().getName(), log.getFirstSegment().getStartOffset());
assertEquals("Start Offset of index not as expected", expectedStartOffset, index.getStartOffset());
assertEquals("End Offset of index not as expected", log.getEndOffset(), index.getCurrentEndOffset());
// advance time by a second in order to be able to add expired keys and to avoid keys that are expired from
// being picked for delete.
advanceTime(Time.MsPerSec);
assertEquals("Incorrect log segment count", 0, index.getLogSegmentCount());
long expectedUsedCapacity;
if (!isLogSegmented) {
// log is filled about ~50%.
addCuratedIndexEntriesToLogSegment(segmentCapacity / 2, 1);
expectedUsedCapacity = segmentCapacity / 2;
assertEquals("Used capacity reported not as expected", expectedUsedCapacity, index.getLogUsedCapacity());
} else {
// first log segment is filled to capacity.
addCuratedIndexEntriesToLogSegment(segmentCapacity, 1);
assertEquals("Used capacity reported not as expected", segmentCapacity, index.getLogUsedCapacity());
// second log segment is filled but has some space at the end (free space has to be less than the lesser of the
// standard delete and put record sizes so that the next write causes a roll over of log segments).
addCuratedIndexEntriesToLogSegment(segmentCapacity - (CuratedLogIndexState.DELETE_RECORD_SIZE - 1), 2);
assertEquals("Used capacity reported not as expected",
2 * segmentCapacity - (CuratedLogIndexState.DELETE_RECORD_SIZE - 1), index.getLogUsedCapacity());
// third log segment is partially filled and is left as the "active" segment
// First Index Segment
// 1 PUT entry
addPutEntries(1, CuratedLogIndexState.PUT_RECORD_SIZE, Utils.Infinite_Time);
// DELETE for a key in the first log segment
LogSegment segment = log.getFirstSegment();
MockId idToDelete = getIdToDeleteFromLogSegment(segment);
addDeleteEntry(idToDelete);
assertEquals("Incorrect log segment count", 3, index.getLogSegmentCount());
// DELETE for a key in the second segment
segment = log.getNextSegment(segment);
idToDelete = getIdToDeleteFromLogSegment(segment);
addDeleteEntry(idToDelete);
// 1 DELETE for the PUT in the same segment
idToDelete = getIdToDeleteFromIndexSegment(referenceIndex.lastKey());
addDeleteEntry(idToDelete);
// 1 PUT entry that spans the rest of the data in the segment (upto a third of the segment size)
long size = segmentCapacity / 3 - index.getCurrentEndOffset().getOffset();
addPutEntries(1, size, Utils.Infinite_Time);
expectedUsedCapacity = 2 * segmentCapacity + segmentCapacity / 3;
assertEquals("Used capacity reported not as expected", expectedUsedCapacity, index.getLogUsedCapacity());
// fourth and fifth log segment are free.
}
// make sure all indexes are written to disk and mapped as required (forcing IndexPersistor to run).
log.flush();
reloadIndex(true, false);
verifyState(isLogSegmented);
assertEquals("Start Offset of index not as expected", expectedStartOffset, index.getStartOffset());
assertEquals("End Offset of index not as expected", log.getEndOffset(), index.getCurrentEndOffset());
assertEquals("Used capacity reported not as expected", expectedUsedCapacity, index.getLogUsedCapacity());
}
/**
* Adds some curated entries into the index in order to ensure a good mix for testing. For understanding the created
* index, please read the source code which is annotated with comments.
* @param sizeToMakeIndexEntriesFor the size to make index entries for.
* @param expectedLogSegmentCount the number of log segments that are expected to assist after the addition of the
* first entry and at the end of the addition of all entries.
* @throws InterruptedException
* @throws IOException
* @throws StoreException
*/
private void addCuratedIndexEntriesToLogSegment(long sizeToMakeIndexEntriesFor, int expectedLogSegmentCount)
throws InterruptedException, IOException, StoreException {
// First Index Segment
// 1 PUT
Offset firstJournalEntryAddedNow =
addPutEntries(1, CuratedLogIndexState.PUT_RECORD_SIZE, Utils.Infinite_Time).get(0).getValue().getOffset();
assertEquals("Incorrect log segment count", expectedLogSegmentCount, index.getLogSegmentCount());
// 2 more PUT
addPutEntries(2, CuratedLogIndexState.PUT_RECORD_SIZE, Utils.Infinite_Time);
// 2 PUT EXPIRED
addPutEntries(2, CuratedLogIndexState.PUT_RECORD_SIZE, 0);
// 5 entries were added - firstJournalEntryAddedNow should still be a part of the journal
List<JournalEntry> entries = index.journal.getEntriesSince(firstJournalEntryAddedNow, true);
assertEquals("There should have been exactly 5 entries returned from the journal", 5, entries.size());
// Second Index Segment
// 4 PUT
addPutEntries(4, CuratedLogIndexState.PUT_RECORD_SIZE, Utils.Infinite_Time);
// 1 DELETE for a PUT in the same index segment
MockId idToDelete = getIdToDeleteFromIndexSegment(referenceIndex.lastKey());
addDeleteEntry(idToDelete);
// 5 more entries (for a total of 10) were added - firstJournalEntryAddedNow should still be a part of the journal
entries = index.journal.getEntriesSince(firstJournalEntryAddedNow, true);
assertEquals("There should have been exactly 10 entries returned from the journal", 10, entries.size());
// 1 DELETE for a PUT in the first index segment
Offset firstIndexSegmentStartOffset = referenceIndex.lowerKey(referenceIndex.lastKey());
idToDelete = getIdToDeleteFromIndexSegment(firstIndexSegmentStartOffset);
addDeleteEntry(idToDelete);
// 1 more entry (for a total of 11) was added - firstJournalEntryAddedNow should no longer be a part of the journal
assertNull("There should no entries returned from the journal",
index.journal.getEntriesSince(firstJournalEntryAddedNow, true));
// Third and Fourth Index Segment
for (int seg = 0; seg < 2; seg++) {
// 3 PUT
addPutEntries(3, CuratedLogIndexState.PUT_RECORD_SIZE, Utils.Infinite_Time);
// 1 PUT for an expired blob
addPutEntries(1, CuratedLogIndexState.PUT_RECORD_SIZE, 0);
// 1 DELETE for the expired PUT
MockId expiredId = logOrder.lastEntry().getValue().getFirst();
addDeleteEntry(expiredId);
deletedKeys.add(expiredId);
expiredKeys.remove(expiredId);
// 1 PUT
addPutEntries(1, CuratedLogIndexState.PUT_RECORD_SIZE, Utils.Infinite_Time);
}
Offset fourthIndexSegmentStartOffset = referenceIndex.lastKey();
Offset thirdIndexSegmentStartOffset = referenceIndex.lowerKey(fourthIndexSegmentStartOffset);
// Fifth Index Segment
// 1 PUT entry
addPutEntries(1, CuratedLogIndexState.PUT_RECORD_SIZE, Utils.Infinite_Time);
// 1 DELETE for a PUT in each of the third and fourth segments
idToDelete = getIdToDeleteFromIndexSegment(thirdIndexSegmentStartOffset);
addDeleteEntry(idToDelete);
idToDelete = getIdToDeleteFromIndexSegment(fourthIndexSegmentStartOffset);
addDeleteEntry(idToDelete);
// 1 DELETE for the PUT in the same segment
idToDelete = getIdToDeleteFromIndexSegment(referenceIndex.lastKey());
addDeleteEntry(idToDelete);
// 1 DELETE for a PUT entry that does not exist
addDeleteEntry(getUniqueId());
// 1 PUT entry that spans the rest of the data in the segment
long size = sizeToMakeIndexEntriesFor - index.getCurrentEndOffset().getOffset();
addPutEntries(1, size, Utils.Infinite_Time);
assertEquals("Incorrect log segment count", expectedLogSegmentCount, index.getLogSegmentCount());
}
/**
* Verifies that the state in {@link PersistentIndex} is the same as the one in {@link #referenceIndex}.
* @param isLogSegmented {@code true} if segmented. {@code false} otherwise.
* @throws IOException
* @throws StoreException
*/
private void verifyState(boolean isLogSegmented) throws IOException, StoreException {
verifyRealIndexSanity();
assertEquals("Incorrect log segment count", isLogSegmented ? 3 : 1, index.getLogSegmentCount());
NavigableMap<Offset, IndexSegment> realIndex = index.getIndexSegments();
assertEquals("Number of index segments does not match expected", referenceIndex.size(), realIndex.size());
Map.Entry<Offset, IndexSegment> realIndexEntry = realIndex.firstEntry();
for (Map.Entry<Offset, TreeMap<MockId, IndexValue>> referenceIndexEntry : referenceIndex.entrySet()) {
assertEquals("Offset of index segment does not match expected", referenceIndexEntry.getKey(),
realIndexEntry.getKey());
TreeMap<MockId, IndexValue> referenceIndexSegment = referenceIndexEntry.getValue();
IndexSegment realIndexSegment = realIndexEntry.getValue();
List<MessageInfo> messageInfos = new ArrayList<>();
assertTrue("There should have been entries returned from the index segment",
realIndexSegment.getEntriesSince(null, new FindEntriesCondition(Long.MAX_VALUE), messageInfos,
new AtomicLong(0)));
assertEquals("Size of index segment differs from expected", referenceIndexSegment.size(), messageInfos.size());
for (Map.Entry<MockId, IndexValue> referenceIndexSegmentEntry : referenceIndexSegment.entrySet()) {
IndexValue value = realIndexSegment.find(referenceIndexSegmentEntry.getKey());
IndexValue referenceValue = referenceIndexSegmentEntry.getValue();
assertEquals("Offset does not match", referenceValue.getOffset(), value.getOffset());
assertEquals("ExpiresAtMs does not match", referenceValue.getExpiresAtMs(), value.getExpiresAtMs());
assertEquals("Size does not match", referenceValue.getSize(), value.getSize());
assertEquals("Service ID does not match", referenceValue.getServiceId(), value.getServiceId());
assertEquals("Container ID does not match", referenceValue.getContainerId(), value.getContainerId());
assertEquals("Original message offset does not match", referenceValue.getOriginalMessageOffset(),
value.getOriginalMessageOffset());
assertEquals("Flags do not match", referenceValue.getFlags(), value.getFlags());
if (index.hardDeleter.enabled.get() && !deletedKeys.contains(referenceIndexSegmentEntry.getKey())) {
assertEquals("Operation time does not match", referenceValue.getOperationTimeInMs(),
value.getOperationTimeInMs());
assertEquals("Value from IndexSegment does not match expected", referenceValue.getBytes(), value.getBytes());
}
}
realIndexEntry = realIndex.higherEntry(realIndexEntry.getKey());
}
assertNull("There should no more index segments left", realIndexEntry);
// all the elements in the last segment should be in the journal
assertNotNull("There is no offset in the log that corresponds to the last index segment start offset",
logOrder.floorEntry(referenceIndex.lastKey()));
Map.Entry<Offset, Pair<MockId, LogEntry>> logEntry = logOrder.floorEntry(referenceIndex.lastKey());
List<JournalEntry> entries = index.journal.getEntriesSince(referenceIndex.lastKey(), true);
for (JournalEntry entry : entries) {
assertNotNull("There are no more entries in the reference log but there are entries in the journal", logEntry);
assertEquals("Offset in journal not as expected", logEntry.getKey(), entry.getOffset());
assertEquals("Key in journal not as expected", logEntry.getValue().getFirst(), entry.getKey());
logEntry = logOrder.higherEntry(logEntry.getKey());
}
assertNull("There should be no more entries in the reference log", logEntry);
}
/**
* Marks {@code id} as deleted.
* @param id the {@link MockId} to mark as deleted.
*/
private void markAsDeleted(MockId id) {
deletedKeys.add(id);
liveKeys.remove(id);
}
/**
* Given an offset, generates the start offset of the index segment that the record at that offset has to go to.
* <p/>
* Use only for the latest record - does not work for offsets that are below the current index end offset.
* @param recordOffset the offset of the record being added to the index.
* @return the index segment start offset of the index segment that the record belongs to.
*/
private Offset generateReferenceIndexSegmentStartOffset(Offset recordOffset) {
if (referenceIndex.size() == 0) {
return recordOffset;
}
Map.Entry<Offset, TreeMap<MockId, IndexValue>> lastEntry = referenceIndex.lastEntry();
Offset indexSegmentStartOffset = lastEntry.getKey();
if (!indexSegmentStartOffset.getName().equals(recordOffset.getName())
|| lastEntry.getValue().size() == CuratedLogIndexState.MAX_IN_MEM_ELEMENTS) {
indexSegmentStartOffset = recordOffset;
}
return indexSegmentStartOffset;
}
/**
* Gets all the valid index entries (taking into account different reference times) in the index segment with start
* offset {@code indexSegmentStartOffset}.
* @param indexSegmentStartOffset the start offset of the {@link IndexSegment} from which valid index entries are
* required.
* @param deleteReferenceTimeMs the reference time in ms until which deletes are relevant.
* @param expiryReferenceTimeMs the reference time in ms until which expirations are relevant.
* @return all the valid index entries valid in the index segment with start offset {@code indexSegmentStartOffset}.
*/
List<IndexEntry> getValidIndexEntriesForIndexSegment(Offset indexSegmentStartOffset, long deleteReferenceTimeMs,
long expiryReferenceTimeMs) {
List<IndexEntry> validEntries = new ArrayList<>();
for (Map.Entry<MockId, IndexValue> indexSegmentEntry : referenceIndex.get(indexSegmentStartOffset).entrySet()) {
MockId key = indexSegmentEntry.getKey();
IndexValue value = indexSegmentEntry.getValue();
if (value.isFlagSet(IndexValue.Flags.Delete_Index)) {
// delete record is always valid
validEntries.add(new IndexEntry(key, value));
if (value.getOriginalMessageOffset() != IndexValue.UNKNOWN_ORIGINAL_MESSAGE_OFFSET
&& value.getOriginalMessageOffset() != value.getOffset().getOffset()
&& value.getOriginalMessageOffset() >= indexSegmentStartOffset.getOffset() && !isDeletedAt(key,
deleteReferenceTimeMs) && !isExpiredAt(key, expiryReferenceTimeMs)) {
// delete is irrelevant but it's in the same index segment as the put and the put is still valid
validEntries.add(new IndexEntry(key, allKeys.get(key).getFirst()));
}
} else if (!isExpiredAt(key, expiryReferenceTimeMs)) {
// unexpired
if (!deletedKeys.contains(key)) {
// non expired, non deleted PUT
validEntries.add(new IndexEntry(key, value));
} else if (!isDeletedAt(key, deleteReferenceTimeMs)) {
// delete does not count
validEntries.add(new IndexEntry(key, value));
}
}
}
return validEntries;
}
/**
* Mock implementation of {@link MessageStoreHardDelete} that returns {@link MessageInfo} appropriately and
* zeroes out a well defined section of any offered blobs.
*/
private class MockMessageStoreHardDelete implements MessageStoreHardDelete {
@Override
public Iterator<HardDeleteInfo> getHardDeleteMessages(MessageReadSet readSet, StoreKeyFactory factory,
List<byte[]> recoveryInfoList) {
/*
* Returns hard delete messages that zero out well known parts of the offered blobs.
*/
class MockMessageStoreHardDeleteIterator implements Iterator<HardDeleteInfo> {
private final MessageReadSet readSet;
private int count = 0;
private MockMessageStoreHardDeleteIterator(MessageReadSet readSet) {
this.readSet = readSet;
}
@Override
public boolean hasNext() {
return count < readSet.count();
}
@Override
public HardDeleteInfo next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
count++;
long size = readSet.sizeInBytes(count - 1) - CuratedLogIndexState.HARD_DELETE_START_OFFSET
- CuratedLogIndexState.HARD_DELETE_LAST_PART_SIZE;
ByteBuffer buf = ByteBuffer.allocate((int) size);
Arrays.fill(buf.array(), (byte) 0);
ByteBufferInputStream stream = new ByteBufferInputStream(buf);
ReadableByteChannel channel = Channels.newChannel(stream);
return new HardDeleteInfo(channel, buf.capacity(), CuratedLogIndexState.HARD_DELETE_START_OFFSET,
CuratedLogIndexState.RECOVERY_INFO);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
return new MockMessageStoreHardDeleteIterator(readSet);
}
@Override
public MessageInfo getMessageInfo(Read read, long offset, StoreKeyFactory factory) {
String segmentName = ((LogSegment) read).getName();
Pair<MockId, LogEntry> idAndValue = logOrder.get(new Offset(segmentName, offset));
IndexValue value = idAndValue.getSecond().indexValue;
return new MessageInfo(idAndValue.getFirst(), value.getSize(), value.getExpiresAtMs());
}
}
/**
* Represents a reference to an entry in the {@link Log}.
*/
static class LogEntry {
final byte[] buffer;
final IndexValue indexValue;
/**
* @param buffer the data in the {@link Log}.
* @param indexValue the {@link IndexValue} that was generated.
*/
LogEntry(byte[] buffer, IndexValue indexValue) {
this.buffer = buffer;
this.indexValue = indexValue;
}
}
}