/*
* Copyright (C) 2012, 2016 higherfrequencytrading.com
* Copyright (C) 2016 Roman Leventov
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package net.openhft.chronicle.hash.impl;
import net.openhft.chronicle.algo.locks.*;
import net.openhft.chronicle.bytes.BytesStore;
import net.openhft.chronicle.bytes.MappedBytesStoreFactory;
import net.openhft.chronicle.bytes.NativeBytesStore;
import net.openhft.chronicle.core.Maths;
import net.openhft.chronicle.core.OS;
import net.openhft.chronicle.hash.*;
import net.openhft.chronicle.hash.impl.util.BuildVersion;
import net.openhft.chronicle.hash.impl.util.jna.PosixMsync;
import net.openhft.chronicle.hash.impl.util.jna.WindowsMsync;
import net.openhft.chronicle.hash.serialization.DataAccess;
import net.openhft.chronicle.hash.serialization.SizeMarshaller;
import net.openhft.chronicle.hash.serialization.SizedReader;
import net.openhft.chronicle.hash.serialization.impl.SerializationBuilder;
import net.openhft.chronicle.map.ChronicleHashCorruptionImpl;
import net.openhft.chronicle.map.ChronicleMapBuilder;
import net.openhft.chronicle.values.Values;
import net.openhft.chronicle.wire.Marshallable;
import net.openhft.chronicle.wire.WireIn;
import net.openhft.chronicle.wire.WireOut;
import org.jetbrains.annotations.NotNull;
import sun.misc.Cleaner;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.lang.ref.WeakReference;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import static java.lang.Long.numberOfTrailingZeros;
import static java.lang.Math.max;
import static java.nio.channels.FileChannel.MapMode.READ_WRITE;
import static net.openhft.chronicle.algo.MemoryUnit.*;
import static net.openhft.chronicle.algo.bytes.Access.nativeAccess;
import static net.openhft.chronicle.core.OS.pageAlign;
import static net.openhft.chronicle.hash.impl.CompactOffHeapLinearHashTable.*;
import static net.openhft.chronicle.map.ChronicleHashCorruptionImpl.format;
import static net.openhft.chronicle.map.ChronicleHashCorruptionImpl.report;
public abstract class VanillaChronicleHash<K,
C extends HashEntry<K>, SC extends HashSegmentContext<K, ?>,
ECQ extends ExternalHashQueryContext<K>>
implements ChronicleHash<K, C, SC, ECQ>, Marshallable {
public static final long TIER_COUNTERS_AREA_SIZE = 64;
public static final long RESERVED_GLOBAL_MUTABLE_STATE_BYTES = 1024;
// --- Start of instance fields ---
/////////////////////////////////////////////////
private String dataFileVersion;
/////////////////////////////////////////////////
// If the hash was created in the first place, or read from disk
public transient boolean createdOrInMemory;
/////////////////////////////////////////////////
// Key Data model
public Class<K> keyClass;
public SizeMarshaller keySizeMarshaller;
public SizedReader<K> keyReader;
public DataAccess<K> keyDataAccess;
/////////////////////////////////////////////////
public boolean checksumEntries;
/////////////////////////////////////////////////
// Concurrency (number of segments), memory management and dependent fields
public int actualSegments;
public HashSplitting hashSplitting;
public long chunkSize;
public int maxChunksPerEntry;
public long actualChunksPerSegmentTier;
/////////////////////////////////////////////////
// Precomputed offsets and sizes for fast Context init
int segmentHeaderSize;
public int tierHashLookupValueBits;
public int tierHashLookupKeyBits;
public int tierHashLookupSlotSize;
public long tierHashLookupCapacity;
public long maxEntriesPerHashLookup;
long tierHashLookupInnerSize;
public long tierHashLookupOuterSize;
public long tierFreeListInnerSize;
public long tierFreeListOuterSize;
long tierEntrySpaceInnerSize;
public int tierEntrySpaceInnerOffset;
long tierEntrySpaceOuterSize;
public long tierSize;
long maxExtraTiers;
long tierBulkSizeInBytes;
long tierBulkInnerOffsetToTiers;
public long tiersInBulk;
protected int log2TiersInBulk;
/////////////////////////////////////////////////
// Resources
private transient File file;
private transient RandomAccessFile raf;
private transient ChronicleHashResources resources;
private transient Cleaner cleaner;
/////////////////////////////////////////////////
// Bytes Store (essentially, the base address) and serialization-dependent offsets
protected transient BytesStore bs;
public static class TierBulkData {
public final BytesStore bytesStore;
public final long offset;
public TierBulkData(BytesStore bytesStore, long offset) {
this.bytesStore = bytesStore;
this.offset = offset;
}
public TierBulkData(TierBulkData data, long offset) {
this.bytesStore = data.bytesStore;
this.offset = offset;
}
}
public transient List<TierBulkData> tierBulkOffsets;
public transient long headerSize;
public transient long segmentHeadersOffset;
transient long segmentsOffset;
/////////////////////////////////////////////////
// Miscellaneous fields
public transient CompactOffHeapLinearHashTable hashLookup;
private transient VanillaGlobalMutableState globalMutableState;
/**
* {@link ChronicleHashCloseOnExitHook} needs to use {@code VanillaChronicleHash}es as
* WeakHashMap keys, but with identity comparison, not Map's equals() and hashCode().
*/
public class Identity {
public VanillaChronicleHash hash() {
return VanillaChronicleHash.this;
}
}
public transient Identity identity;
// --- End of instance fields ---
public VanillaChronicleHash(ChronicleMapBuilder<K, ?> builder) {
// Version
dataFileVersion = BuildVersion.version();
createdOrInMemory = true;
@SuppressWarnings({"deprecation", "unchecked"})
ChronicleHashBuilderPrivateAPI<K, ?> privateAPI =
(ChronicleHashBuilderPrivateAPI<K, ?>) builder.privateAPI();
// Data model
SerializationBuilder<K> keyBuilder = privateAPI.keyBuilder();
keyClass = keyBuilder.tClass;
keySizeMarshaller = keyBuilder.sizeMarshaller();
keyReader = keyBuilder.reader();
keyDataAccess = keyBuilder.dataAccess();
actualSegments = privateAPI.actualSegments();
hashSplitting = HashSplitting.forSegments(actualSegments);
chunkSize = privateAPI.chunkSize();
maxChunksPerEntry = privateAPI.maxChunksPerEntry();
actualChunksPerSegmentTier = privateAPI.actualChunksPerSegmentTier();
// Precomputed offsets and sizes for fast Context init
segmentHeaderSize = privateAPI.segmentHeaderSize();
tierHashLookupValueBits = valueBits(actualChunksPerSegmentTier);
tierHashLookupKeyBits = keyBits(privateAPI.entriesPerSegment(), tierHashLookupValueBits);
tierHashLookupSlotSize =
entrySize(tierHashLookupKeyBits, tierHashLookupValueBits);
if (!privateAPI.aligned64BitMemoryOperationsAtomic() && tierHashLookupSlotSize > 4) {
throw new IllegalStateException("aligned64BitMemoryOperationsAtomic() == false, " +
"but hash lookup slot is " + tierHashLookupSlotSize);
}
tierHashLookupCapacity = privateAPI.tierHashLookupCapacity();
maxEntriesPerHashLookup = (long) (tierHashLookupCapacity * MAX_LOAD_FACTOR);
tierHashLookupInnerSize = tierHashLookupCapacity * tierHashLookupSlotSize;
tierHashLookupOuterSize = CACHE_LINES.align(tierHashLookupInnerSize, BYTES);
tierFreeListInnerSize = LONGS.align(
BYTES.alignAndConvert(actualChunksPerSegmentTier, BITS), BYTES);
tierFreeListOuterSize = CACHE_LINES.align(tierFreeListInnerSize, BYTES);
tierEntrySpaceInnerSize = chunkSize * actualChunksPerSegmentTier;
tierEntrySpaceInnerOffset = privateAPI.segmentEntrySpaceInnerOffset();
tierEntrySpaceOuterSize = CACHE_LINES.align(
tierEntrySpaceInnerOffset + tierEntrySpaceInnerSize, BYTES);
tierSize = tierSize();
maxExtraTiers = privateAPI.maxExtraTiers();
tiersInBulk = computeNumberOfTiersInBulk();
log2TiersInBulk = Maths.intLog2(tiersInBulk);
tierBulkInnerOffsetToTiers = computeTierBulkInnerOffsetToTiers(tiersInBulk);
tierBulkSizeInBytes = computeTierBulkBytesSize(tiersInBulk);
checksumEntries = privateAPI.checksumEntries();
}
@Override
public void readMarshallable(@NotNull WireIn wire) {
readMarshallableFields(wire);
initTransients();
}
protected void readMarshallableFields(@NotNull WireIn wireIn) {
dataFileVersion = wireIn.read(() -> "dataFileVersion").text();
// Previously this assignment was done in default field initializer, but with Wire
// serialization VanillaChronicleMap instance is created with unsafe.allocateInstance(),
// that doesn't guarantee (?) to initialize fields with default values (false for boolean)
createdOrInMemory = false;
keyClass = wireIn.read(() -> "keyClass").typeLiteral();
keySizeMarshaller = wireIn.read(() -> "keySizeMarshaller").typedMarshallable();
keyReader = wireIn.read(() -> "keyReader").typedMarshallable();
keyDataAccess = wireIn.read(() -> "keyDataAccess").typedMarshallable();
checksumEntries = wireIn.read(() -> "checksumEntries").bool();
actualSegments = wireIn.read(() -> "actualSegments").int32();
hashSplitting = wireIn.read(() -> "hashSplitting").typedMarshallable();
chunkSize = wireIn.read(() -> "chunkSize").int64();
maxChunksPerEntry = wireIn.read(() -> "maxChunksPerEntry").int32();
actualChunksPerSegmentTier = wireIn.read(() -> "actualChunksPerSegmentTier").int64();
segmentHeaderSize = wireIn.read(() -> "segmentHeaderSize").int32();
tierHashLookupValueBits = wireIn.read(() -> "tierHashLookupValueBits").int32();
tierHashLookupKeyBits = wireIn.read(() -> "tierHashLookupKeyBits").int32();
tierHashLookupSlotSize = wireIn.read(() -> "tierHashLookupSlotSize").int32();
tierHashLookupCapacity = wireIn.read(() -> "tierHashLookupCapacity").int64();
maxEntriesPerHashLookup = wireIn.read(() -> "maxEntriesPerHashLookup").int64();
tierHashLookupInnerSize = wireIn.read(() -> "tierHashLookupInnerSize").int64();
tierHashLookupOuterSize = wireIn.read(() -> "tierHashLookupOuterSize").int64();
tierFreeListInnerSize = wireIn.read(() -> "tierFreeListInnerSize").int64();
tierFreeListOuterSize = wireIn.read(() -> "tierFreeListOuterSize").int64();
tierEntrySpaceInnerSize = wireIn.read(() -> "tierEntrySpaceInnerSize").int64();
tierEntrySpaceInnerOffset = wireIn.read(() -> "tierEntrySpaceInnerOffset").int32();
tierEntrySpaceOuterSize = wireIn.read(() -> "tierEntrySpaceOuterSize").int64();
tierSize = wireIn.read(() -> "tierSize").int64();
maxExtraTiers = wireIn.read(() -> "maxExtraTiers").int64();
tierBulkSizeInBytes = wireIn.read(() -> "tierBulkSizeInBytes").int64();
tierBulkInnerOffsetToTiers = wireIn.read(() -> "tierBulkInnerOffsetToTiers").int64();
tiersInBulk = wireIn.read(() -> "tiersInBulk").int64();
log2TiersInBulk = wireIn.read(() -> "log2TiersInBulk").int32();
}
@Override
public void writeMarshallable(@NotNull WireOut wireOut) {
wireOut.write(() -> "dataFileVersion").text(dataFileVersion);
wireOut.write(() -> "keyClass").typeLiteral(keyClass);
wireOut.write(() -> "keySizeMarshaller").typedMarshallable(keySizeMarshaller);
wireOut.write(() -> "keyReader").typedMarshallable(keyReader);
wireOut.write(() -> "keyDataAccess").typedMarshallable(keyDataAccess);
wireOut.write(() -> "checksumEntries").bool(checksumEntries);
wireOut.write(() -> "actualSegments").int32(actualSegments);
wireOut.write(() -> "hashSplitting").typedMarshallable(hashSplitting);
wireOut.write(() -> "chunkSize").int64(chunkSize);
wireOut.write(() -> "maxChunksPerEntry").int32(maxChunksPerEntry);
wireOut.write(() -> "actualChunksPerSegmentTier").int64(actualChunksPerSegmentTier);
wireOut.write(() -> "segmentHeaderSize").int32(segmentHeaderSize);
wireOut.write(() -> "tierHashLookupValueBits").int32(tierHashLookupValueBits);
wireOut.write(() -> "tierHashLookupKeyBits").int32(tierHashLookupKeyBits);
wireOut.write(() -> "tierHashLookupSlotSize").int32(tierHashLookupSlotSize);
wireOut.write(() -> "tierHashLookupCapacity").int64(tierHashLookupCapacity);
wireOut.write(() -> "maxEntriesPerHashLookup").int64(maxEntriesPerHashLookup);
wireOut.write(() -> "tierHashLookupInnerSize").int64(tierHashLookupInnerSize);
wireOut.write(() -> "tierHashLookupOuterSize").int64(tierHashLookupOuterSize);
wireOut.write(() -> "tierFreeListInnerSize").int64(tierFreeListInnerSize);
wireOut.write(() -> "tierFreeListOuterSize").int64(tierFreeListOuterSize);
wireOut.write(() -> "tierEntrySpaceInnerSize").int64(tierEntrySpaceInnerSize);
wireOut.write(() -> "tierEntrySpaceInnerOffset").int32(tierEntrySpaceInnerOffset);
wireOut.write(() -> "tierEntrySpaceOuterSize").int64(tierEntrySpaceOuterSize);
wireOut.write(() -> "tierSize").int64(tierSize);
wireOut.write(() -> "maxExtraTiers").int64(maxExtraTiers);
wireOut.write(() -> "tierBulkSizeInBytes").int64(tierBulkSizeInBytes);
wireOut.write(() -> "tierBulkInnerOffsetToTiers").int64(tierBulkInnerOffsetToTiers);
wireOut.write(() -> "tiersInBulk").int64(tiersInBulk);
wireOut.write(() -> "log2TiersInBulk").int32(log2TiersInBulk);
}
protected VanillaGlobalMutableState createGlobalMutableState() {
return Values.newNativeReference(VanillaGlobalMutableState.class);
}
public VanillaGlobalMutableState globalMutableState() {
return globalMutableState;
}
private long tierSize() {
long segmentSize = tierHashLookupOuterSize + TIER_COUNTERS_AREA_SIZE +
tierFreeListOuterSize + tierEntrySpaceOuterSize;
if ((segmentSize & 63L) != 0)
throw new AssertionError();
return breakL1CacheAssociativityContention(segmentSize);
}
protected final long breakL1CacheAssociativityContention(long sizeInBytes) {
// Conventional alignment to break is 4096 (given Intel's 32KB 8-way L1 cache),
// for any case break 2 times smaller alignment
int alignmentToBreak = 2048;
int eachNthSegmentFallIntoTheSameSet =
max(1, alignmentToBreak >> numberOfTrailingZeros(sizeInBytes));
if (eachNthSegmentFallIntoTheSameSet < actualSegments)
sizeInBytes |= CACHE_LINES.toBytes(1L); // make segment size "odd" (in cache lines)
return sizeInBytes;
}
private long computeNumberOfTiersInBulk() {
// TODO review heuristics
int tiersInBulk = actualSegments / 8;
tiersInBulk = Maths.nextPower2(tiersInBulk, 1);
while (computeTierBulkBytesSize(tiersInBulk) < OS.pageSize()) {
tiersInBulk *= 2;
}
return tiersInBulk;
}
private long computeTierBulkBytesSize(long tiersInBulk) {
return computeTierBulkInnerOffsetToTiers(tiersInBulk) + tiersInBulk * tierSize;
}
protected long computeTierBulkInnerOffsetToTiers(long tiersInBulk) {
return 0L;
}
public void initTransients() {
initOwnTransients();
}
private void initOwnTransients() {
globalMutableState = createGlobalMutableState();
tierBulkOffsets = new ArrayList<>();
if (tierHashLookupSlotSize == 4) {
hashLookup = new IntCompactOffHeapLinearHashTable(this);
} else if (tierHashLookupSlotSize == 8) {
hashLookup = new LongCompactOffHeapLinearHashTable(this);
} else {
throw new AssertionError("hash lookup slot size could be 4 or 8, " +
tierHashLookupSlotSize + " observed");
}
identity = new Identity();
}
public final void initBeforeMapping(
File file, RandomAccessFile raf, long headerEnd, boolean recover) throws IOException {
this.file = file;
this.raf = raf;
this.headerSize = roundUpMapHeaderSize(headerEnd);
if (!createdOrInMemory) {
// This block is for reading segmentHeadersOffset before main mapping
// After the mapping globalMutableState value's bytes are reassigned
ByteBuffer globalMutableStateBuffer =
ByteBuffer.allocate((int) globalMutableState.maxSize());
FileChannel fileChannel = raf.getChannel();
while (globalMutableStateBuffer.remaining() > 0) {
if (fileChannel.read(globalMutableStateBuffer,
this.headerSize + GLOBAL_MUTABLE_STATE_VALUE_OFFSET +
globalMutableStateBuffer.position()) == -1) {
throw throwRecoveryOrReturnIOException(file, "truncated", recover);
}
}
globalMutableStateBuffer.flip();
//noinspection unchecked
globalMutableState.bytesStore(BytesStore.wrap(globalMutableStateBuffer), 0,
globalMutableState.maxSize());
}
}
public static IOException throwRecoveryOrReturnIOException(
File file, String message, boolean recover) {
message = "file=" + file + " " + message;
if (recover) {
throw new ChronicleHashRecoveryFailedException(message);
} else {
return new IOException(message);
}
}
private static long roundUpMapHeaderSize(long headerSize) {
return CACHE_LINES.align(headerSize, BYTES);
}
public final void createInMemoryStoreAndSegments(ChronicleHashResources resources)
throws IOException {
this.resources = resources;
BytesStore bytesStore = nativeBytesStoreWithFixedCapacity(sizeInBytesWithoutTiers());
createStoreAndSegments(bytesStore);
}
private void createStoreAndSegments(BytesStore bytesStore) throws IOException {
initBytesStoreAndHeadersViews(bytesStore);
initOffsetsAndBulks();
}
private void initOffsetsAndBulks() {
segmentHeadersOffset = segmentHeadersOffset();
long segmentHeadersSize = actualSegments * segmentHeaderSize;
segmentsOffset = segmentHeadersOffset + segmentHeadersSize;
if (createdOrInMemory) {
zeroOutNewlyMappedChronicleMapBytes();
// write the segment headers offset after zeroing out
globalMutableState.setSegmentHeadersOffset(segmentHeadersOffset);
globalMutableState.setDataStoreSize(sizeInBytesWithoutTiers());
} else {
initBulks();
}
}
private void initBulks() {
if (globalMutableState.getAllocatedExtraTierBulks() > 0) {
appendBulkData(0, globalMutableState.getAllocatedExtraTierBulks() - 1,
bs, sizeInBytesWithoutTiers());
}
}
private void initBytesStoreAndHeadersViews(BytesStore bytesStore) {
if (bytesStore.start() != 0) {
throw new AssertionError("bytes store " + bytesStore + " starts from " +
bytesStore.start() + ", 0 expected");
}
this.bs = bytesStore;
//noinspection unchecked
globalMutableState.bytesStore(bs, headerSize + GLOBAL_MUTABLE_STATE_VALUE_OFFSET,
globalMutableState.maxSize());
onHeaderCreated();
}
public void setResourcesName() {
resources.setChronicleHashIdentityString(toIdentityString());
}
public void registerCleaner() {
this.cleaner = Cleaner.create(this, resources);
}
public void addToOnExitHook() {
ChronicleHashCloseOnExitHook.add(this);
}
public final void createMappedStoreAndSegments(ChronicleHashResources resources)
throws IOException {
this.resources = resources;
createStoreAndSegments(map(dataStoreSize(), 0));
}
public final void basicRecover(
ChronicleHashResources resources, ChronicleHashCorruption.Listener corruptionListener,
ChronicleHashCorruptionImpl corruption)
throws IOException {
this.resources = resources;
long segmentHeadersOffset = globalMutableState().getSegmentHeadersOffset();
if (segmentHeadersOffset <= 0 || segmentHeadersOffset % 4096 != 0 ||
segmentHeadersOffset > GIGABYTES.toBytes(1)) {
segmentHeadersOffset = computeSegmentHeadersOffset();
}
long sizeInBytesWithoutTiers = computeSizeInBytesWithoutTiers(segmentHeadersOffset);
long dataStoreSize = globalMutableState().getDataStoreSize();
int allocatedExtraTierBulks = globalMutableState().getAllocatedExtraTierBulks();
if (dataStoreSize < sizeInBytesWithoutTiers ||
((dataStoreSize - sizeInBytesWithoutTiers) % tierBulkSizeInBytes != 0)) {
dataStoreSize = sizeInBytesWithoutTiers + allocatedExtraTierBulks * tierBulkSizeInBytes;
} else {
allocatedExtraTierBulks =
(int) ((dataStoreSize - sizeInBytesWithoutTiers) / tierBulkSizeInBytes);
}
initBytesStoreAndHeadersViews(map(dataStoreSize, 0));
resetGlobalMutableStateLock(corruptionListener, corruption);
recoverAllocatedExtraTierBulks(allocatedExtraTierBulks, corruptionListener, corruption);
recoverSegmentHeadersOffset(segmentHeadersOffset, corruptionListener, corruption);
recoverDataStoreSize(dataStoreSize, corruptionListener, corruption);
initOffsetsAndBulks();
}
private void resetGlobalMutableStateLock(
ChronicleHashCorruption.Listener corruptionListener,
ChronicleHashCorruptionImpl corruption) {
long lockAddr = globalMutableStateAddress() + GLOBAL_MUTABLE_STATE_LOCK_OFFSET;
LockingStrategy lockingStrategy = globalMutableStateLockingStrategy;
long lockState = lockingStrategy.getState(nativeAccess(), null, lockAddr);
if (lockState != lockingStrategy.resetState()) {
report(corruptionListener, corruption, -1, () ->
format("global mutable state lock of map at {} is not clear: {}",
file, lockingStrategy.toString(lockState))
);
lockingStrategy.reset(nativeAccess(), null, lockAddr);
}
}
private void recoverAllocatedExtraTierBulks(
int allocatedExtraTierBulks, ChronicleHashCorruption.Listener corruptionListener,
ChronicleHashCorruptionImpl corruption) {
if (globalMutableState.getAllocatedExtraTierBulks() != allocatedExtraTierBulks) {
report(corruptionListener, corruption, -1, () ->
format("allocated extra tier bulks counter corrupted, or the map file {} " +
"is truncated. stored: {}, should be: {}",
file, globalMutableState.getAllocatedExtraTierBulks(),
allocatedExtraTierBulks)
);
globalMutableState.setAllocatedExtraTierBulks(allocatedExtraTierBulks);
}
}
private void recoverSegmentHeadersOffset(
long segmentHeadersOffset, ChronicleHashCorruption.Listener corruptionListener,
ChronicleHashCorruptionImpl corruption) {
if (globalMutableState.getSegmentHeadersOffset() != segmentHeadersOffset) {
report(corruptionListener, corruption, -1, () ->
format("segment headers offset of map at {} corrupted. stored: {}, should be: {}",
file, globalMutableState.getSegmentHeadersOffset(), segmentHeadersOffset)
);
globalMutableState.setSegmentHeadersOffset(segmentHeadersOffset);
}
}
private void recoverDataStoreSize(
long dataStoreSize, ChronicleHashCorruption.Listener corruptionListener,
ChronicleHashCorruptionImpl corruption) {
if (globalMutableState.getDataStoreSize() != dataStoreSize) {
report(corruptionListener, corruption, -1, () ->
format("data store size of map at {} corrupted. stored: {}, should be: {}",
file, globalMutableState.getDataStoreSize(), dataStoreSize)
);
globalMutableState.setDataStoreSize(dataStoreSize);
}
}
private boolean persisted() {
return file != null;
}
/**
* newly-extended file contents are not guaranteed to be zero
*/
protected void zeroOutNewlyMappedChronicleMapBytes() {
zeroOutGlobalMutableState();
zeroOutSegmentHeaders();
zeroOutFirstSegmentTiers();
}
private void zeroOutGlobalMutableState() {
bs.zeroOut(headerSize, headerSize + globalMutableStateTotalUsedSize());
}
protected long globalMutableStateTotalUsedSize() {
return GLOBAL_MUTABLE_STATE_VALUE_OFFSET + globalMutableState().maxSize();
}
private void zeroOutSegmentHeaders() {
bs.zeroOut(segmentHeadersOffset, segmentsOffset);
}
private void zeroOutFirstSegmentTiers() {
for (int segmentIndex = 0; segmentIndex < segments(); segmentIndex++) {
long segmentOffset = segmentOffset(segmentIndex);
zeroOutNewlyMappedTier(bs, segmentOffset);
}
}
private void zeroOutNewlyMappedTier(BytesStore bytesStore, long tierOffset) {
// Zero out hash lookup, tier data and free list bit set. Leave entry space dirty.
bytesStore.zeroOut(tierOffset, tierOffset + tierSize - tierEntrySpaceOuterSize);
}
public void onHeaderCreated() {
}
/**
* @return the version of Chronicle Map that was used to create the current data file
*/
public String persistedDataVersion() {
return dataFileVersion;
}
private long segmentHeadersOffset() {
if (createdOrInMemory) {
return computeSegmentHeadersOffset();
} else {
return globalMutableState.getSegmentHeadersOffset();
}
}
private long computeSegmentHeadersOffset() {
long reserved = RESERVED_GLOBAL_MUTABLE_STATE_BYTES - globalMutableStateTotalUsedSize();
// Align segment headers on page boundary to minimize number of pages that
// segment headers span
return pageAlign(mapHeaderInnerSize() + reserved);
}
public long mapHeaderInnerSize() {
return headerSize + globalMutableStateTotalUsedSize();
}
@Override
public File file() {
return file;
}
public final long sizeInBytesWithoutTiers() {
return computeSizeInBytesWithoutTiers(segmentHeadersOffset());
}
private long computeSizeInBytesWithoutTiers(long segmentHeadersOffset) {
return segmentHeadersOffset + actualSegments * (segmentHeaderSize + tierSize);
}
public final long dataStoreSize() {
long sizeInBytesWithoutTiers = sizeInBytesWithoutTiers();
int allocatedExtraTierBulks = !createdOrInMemory ?
globalMutableState.getAllocatedExtraTierBulks() : 0;
return sizeInBytesWithoutTiers + allocatedExtraTierBulks * tierBulkSizeInBytes;
}
@Override
public final void close() {
if (resources.releaseManually()) {
cleanupOnClose();
}
}
protected void cleanupOnClose() {
// Releases nothing after resources.releaseManually(), only removes the cleaner
// from the internal linked list of all cleaners.
cleaner.clean();
ChronicleHashCloseOnExitHook.remove(this);
// Make GC life easier
keyReader = null;
keyDataAccess = null;
}
@Override
public boolean isOpen() {
return !resources.closed();
}
public final void checkKey(Object key) {
if (!keyClass.isInstance(key)) {
// key.getClass will cause NPE exactly as needed
throw new ClassCastException(toIdentityString() + ": Key must be a " +
keyClass.getName() + " but was a " + key.getClass());
}
}
public final long segmentHeaderAddress(int segmentIndex) {
return bsAddress() + segmentHeadersOffset + ((long) segmentIndex) * segmentHeaderSize;
}
public long bsAddress() {
return bs.address(0);
}
public final long segmentBaseAddr(int segmentIndex) {
return bsAddress() + segmentOffset(segmentIndex);
}
private long segmentOffset(long segmentIndex) {
return segmentsOffset + segmentIndex * tierSize;
}
public final int inChunks(long sizeInBytes) {
// TODO optimize for the case when chunkSize is power of 2, that is default (and often) now
if (sizeInBytes <= chunkSize)
return 1;
// int division is MUCH faster than long on Intel CPUs
sizeInBytes -= 1L;
if (sizeInBytes <= Integer.MAX_VALUE)
return (((int) sizeInBytes) / (int) chunkSize) + 1;
return (int) (sizeInBytes / chunkSize) + 1;
}
public final int size() {
long size = longSize();
return size > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) size;
}
@Override
public int segments() {
return actualSegments;
}
/**
* Global mutable state lock doesn't yet need read-write levels and waits;
* Used the same locking strategy as in segment locks
* (VanillaReadWriteUpdateWithWaitsLockingStrategy) in order to simplify Chronicle Map
* specification (having only one kind of locks to specify and implement).
*/
static final LockingStrategy globalMutableStateLockingStrategy =
VanillaReadWriteUpdateWithWaitsLockingStrategy.instance();
static final TryAcquireOperation<LockingStrategy> globalMutableStateLockTryAcquireOperation =
TryAcquireOperations.lock();
static final
AcquisitionStrategy<LockingStrategy, RuntimeException>
globalMutableStateLockAcquisitionStrategy =
AcquisitionStrategies.spinLoopOrFail(2, TimeUnit.SECONDS);
private static final long GLOBAL_MUTABLE_STATE_LOCK_OFFSET = 0L;
private static final long GLOBAL_MUTABLE_STATE_VALUE_OFFSET = 8L;
private long globalMutableStateAddress() {
return bsAddress() + headerSize;
}
public void globalMutableStateLock() {
globalMutableStateLockAcquisitionStrategy.acquire(
globalMutableStateLockTryAcquireOperation, globalMutableStateLockingStrategy,
nativeAccess(), null,
globalMutableStateAddress() + GLOBAL_MUTABLE_STATE_LOCK_OFFSET);
}
public void globalMutableStateUnlock() {
globalMutableStateLockingStrategy.unlock(nativeAccess(), null,
globalMutableStateAddress() + GLOBAL_MUTABLE_STATE_LOCK_OFFSET);
}
/** For tests */
public boolean hasExtraTierBulks() {
return globalMutableState.getAllocatedExtraTierBulks() > 0;
}
@Override
public long offHeapMemoryUsed() {
return resources.totalMemory();
}
public long allocateTier() {
globalMutableStateLock();
try {
long tiersInUse = globalMutableState.getExtraTiersInUse();
if (tiersInUse >= maxExtraTiers) {
throw new IllegalStateException(toIdentityString() + ": " +
"Attempt to allocate #" + (tiersInUse + 1) +
" extra segment tier, " + maxExtraTiers + " is maximum.\n" +
"Possible reasons include:\n" +
" - you have forgotten to configure (or configured wrong) " +
"builder.entries() number\n" +
" - same regarding other sizing Chronicle Hash configurations, most " +
"likely maxBloatFactor(), averageKeySize(), or averageValueSize()\n" +
" - keys, inserted into the ChronicleHash, are distributed suspiciously " +
"bad. This might be a DOS attack");
}
long firstFreeTierIndex = globalMutableState.getFirstFreeTierIndex();
if (firstFreeTierIndex < 0) {
throw new RuntimeException(toIdentityString() +
": unexpected firstFreeTierIndex value " + firstFreeTierIndex);
}
if (firstFreeTierIndex == 0) {
try {
allocateTierBulk();
} catch (IOException e) {
throw new RuntimeException(toIdentityString(), e);
}
firstFreeTierIndex = globalMutableState.getFirstFreeTierIndex();
if (firstFreeTierIndex <= 0) {
throw new RuntimeException(toIdentityString() +
": unexpected firstFreeTierIndex value " + firstFreeTierIndex);
}
}
globalMutableState.setExtraTiersInUse(tiersInUse + 1);
BytesStore allocatedTierBytes = tierBytesStore(firstFreeTierIndex);
long allocatedTierOffset = tierBytesOffset(firstFreeTierIndex);
long tierBaseAddr = allocatedTierBytes.address(0) + allocatedTierOffset;
long tierCountersAreaAddr = tierBaseAddr + tierHashLookupOuterSize;
long nextFreeTierIndex = TierCountersArea.nextTierIndex(tierCountersAreaAddr);
globalMutableState.setFirstFreeTierIndex(nextFreeTierIndex);
return firstFreeTierIndex;
} finally {
globalMutableStateUnlock();
}
}
private void allocateTierBulk() throws IOException {
int allocatedExtraTierBulks = globalMutableState.getAllocatedExtraTierBulks();
mapTierBulks(allocatedExtraTierBulks);
long firstTierIndex = extraTierIndexToTierIndex(allocatedExtraTierBulks * tiersInBulk);
BytesStore tierBytesStore = tierBytesStore(firstTierIndex);
long firstTierOffset = tierBytesOffset(firstTierIndex);
if (tierBulkInnerOffsetToTiers > 0) {
// These bytes are bit sets in Replicated version
tierBytesStore.zeroOut(firstTierOffset - tierBulkInnerOffsetToTiers, firstTierOffset);
}
long lastTierIndex = firstTierIndex + tiersInBulk - 1;
linkAndZeroOutFreeTiers(firstTierIndex, lastTierIndex);
// see HCOLL-397
if (persisted()) {
long address = tierBytesStore.address(firstTierOffset - tierBulkInnerOffsetToTiers);
long endAddress = tierBytesStore.address(tierBytesOffset(lastTierIndex)) + tierSize;
long length = endAddress - address;
msync(address, length);
}
// after we are sure the new bulk is initialized, update the global mutable state
globalMutableState.setAllocatedExtraTierBulks(allocatedExtraTierBulks + 1);
globalMutableState.setFirstFreeTierIndex(firstTierIndex);
globalMutableState.addDataStoreSize(tierBulkSizeInBytes);
}
public void msync() throws IOException {
if (persisted()) {
msync(bsAddress(), bs.capacity());
}
}
private void msync(long address, long length) throws IOException {
// address should be a multiple of page size
if (OS.pageAlign(address) != address) {
long oldAddress = address;
address = OS.pageAlign(address) - OS.pageSize();
length += oldAddress - address;
}
if (OS.isWindows()) {
WindowsMsync.msync(raf, address, length);
} else {
PosixMsync.msync(address, length);
}
}
public void linkAndZeroOutFreeTiers(long firstTierIndex, long lastTierIndex) {
for (long tierIndex = firstTierIndex; tierIndex <= lastTierIndex; tierIndex++) {
long tierOffset = tierBytesOffset(tierIndex);
BytesStore tierBytesStore = tierBytesStore(tierIndex);
zeroOutNewlyMappedTier(tierBytesStore, tierOffset);
if (tierIndex < lastTierIndex) {
long tierCountersAreaOffset = tierOffset + tierHashLookupOuterSize;
TierCountersArea.nextTierIndex(tierBytesStore.address(0) + tierCountersAreaOffset,
tierIndex + 1);
}
}
}
public long extraTierIndexToTierIndex(long extraTierIndex) {
return actualSegments + extraTierIndex + 1;
}
public long tierIndexToBaseAddr(long tierIndex) {
// tiers are 1-counted, to allow tierIndex = 0 to be un-initialized in off-heap memory,
// convert into 0-based form
long tierIndexMinusOne = tierIndex - 1;
if (tierIndexMinusOne < actualSegments)
return segmentBaseAddr((int) tierIndexMinusOne);
return extraTierIndexToBaseAddr(tierIndexMinusOne);
}
public BytesStore tierBytesStore(long tierIndex) {
long tierIndexMinusOne = tierIndex - 1;
if (tierIndexMinusOne < actualSegments)
return bs;
return tierBulkData(tierIndexMinusOne).bytesStore;
}
public long tierBytesOffset(long tierIndex) {
long tierIndexMinusOne = tierIndex - 1;
if (tierIndexMinusOne < actualSegments)
return segmentOffset(tierIndexMinusOne);
long extraTierIndex = tierIndexMinusOne - actualSegments;
int bulkIndex = (int) (extraTierIndex >> log2TiersInBulk);
if (bulkIndex >= tierBulkOffsets.size())
mapTierBulks(bulkIndex);
return tierBulkOffsets.get(bulkIndex).offset + tierBulkInnerOffsetToTiers +
(extraTierIndex & (tiersInBulk - 1)) * tierSize;
}
private TierBulkData tierBulkData(long tierIndexMinusOne) {
long extraTierIndex = tierIndexMinusOne - actualSegments;
int bulkIndex = (int) (extraTierIndex >> log2TiersInBulk);
if (bulkIndex >= tierBulkOffsets.size())
mapTierBulks(bulkIndex);
return tierBulkOffsets.get(bulkIndex);
}
private long extraTierIndexToBaseAddr(long tierIndexMinusOne) {
long extraTierIndex = tierIndexMinusOne - actualSegments;
int bulkIndex = (int) (extraTierIndex >> log2TiersInBulk);
if (bulkIndex >= tierBulkOffsets.size())
mapTierBulks(bulkIndex);
TierBulkData tierBulkData = tierBulkOffsets.get(bulkIndex);
long tierIndexOffsetWithinBulk = extraTierIndex & (tiersInBulk - 1);
return tierAddr(tierBulkData, tierIndexOffsetWithinBulk);
}
protected long tierAddr(TierBulkData tierBulkData, long tierIndexOffsetWithinBulk) {
return tierBulkData.bytesStore.address(0) + tierBulkData.offset +
tierBulkInnerOffsetToTiers + tierIndexOffsetWithinBulk * tierSize;
}
private void mapTierBulks(int upToBulkIndex) {
if (persisted()) {
try {
mapTierBulksMapped(upToBulkIndex);
} catch (IOException e) {
throw new RuntimeException(toIdentityString(), e);
}
} else {
// in-memory ChMap
allocateTierBulks(upToBulkIndex);
}
}
private void mapTierBulksMapped(int upToBulkIndex) throws IOException {
int firstBulkToMapIndex = tierBulkOffsets.size();
int bulksToMap = upToBulkIndex + 1 - firstBulkToMapIndex;
long mapSize = bulksToMap * tierBulkSizeInBytes;
long mappingOffsetInFile, firstBulkToMapOffsetWithinMapping;
long firstBulkToMapOffset = bulkOffset(firstBulkToMapIndex);
if (OS.mapAlign(firstBulkToMapOffset) == firstBulkToMapOffset) {
mappingOffsetInFile = firstBulkToMapOffset;
firstBulkToMapOffsetWithinMapping = 0;
} else {
// If the bulk was allocated on OS with 4K mapping granularity (linux) and we
// are mapping it in OS with 64K mapping granularity (windows), we might need to
// start the mapping earlier than the first tier to map actually starts
mappingOffsetInFile = OS.mapAlign(firstBulkToMapOffset) - OS.mapAlignment();
firstBulkToMapOffsetWithinMapping = firstBulkToMapOffset - mappingOffsetInFile;
// Now might need to have bigger mapSize
mapSize += firstBulkToMapOffsetWithinMapping;
}
// mapping by hand, because MappedFile/MappedBytesStore doesn't allow to create a BS
// which starts not from the beginning of the file, but has start() of 0
NativeBytesStore extraStore = map(mapSize, mappingOffsetInFile);
appendBulkData(firstBulkToMapIndex, upToBulkIndex, extraStore,
firstBulkToMapOffsetWithinMapping);
}
/**
* @see net.openhft.chronicle.bytes.MappedFile#acquireByteStore(long, MappedBytesStoreFactory)
*/
private NativeBytesStore map(long mapSize, long mappingOffsetInFile) throws IOException {
mapSize = pageAlign(mapSize);
long minFileSize = mappingOffsetInFile + mapSize;
FileChannel fileChannel = raf.getChannel();
if (fileChannel.size() < minFileSize) {
// In MappedFile#acquireByteStore(), this is wrapped with fileLock(), to avoid race
// condition between processes. This map() method is called either when a new tier is
// allocated (in this case concurrent access is mutually excluded by
// globalMutableStateLock), or on map creation, when race condition should be excluded
// by self-bootstrapping header spec
raf.setLength(minFileSize);
}
long address = OS.map(fileChannel, READ_WRITE, mappingOffsetInFile, mapSize);
resources.addMemoryResource(address, mapSize);
return new NativeBytesStore(address, mapSize, null, false);
}
private long bulkOffset(int bulkIndex) {
return sizeInBytesWithoutTiers() + bulkIndex * tierBulkSizeInBytes;
}
private void allocateTierBulks(int upToBulkIndex) {
int firstBulkToAllocateIndex = tierBulkOffsets.size();
int bulksToAllocate = upToBulkIndex + 1 - firstBulkToAllocateIndex;
long allocationSize = bulksToAllocate * tierBulkSizeInBytes;
BytesStore extraStore = nativeBytesStoreWithFixedCapacity(allocationSize);
appendBulkData(firstBulkToAllocateIndex, upToBulkIndex, extraStore, 0);
}
private BytesStore nativeBytesStoreWithFixedCapacity(long capacity) {
long address = OS.memory().allocate(capacity);
resources.addMemoryResource(address, capacity);
return new NativeBytesStore<>(address, capacity, null, false);
}
private void appendBulkData(int firstBulkToMapIndex, int upToBulkIndex, BytesStore extraStore,
long offsetWithinMapping) {
TierBulkData firstMappedBulkData = new TierBulkData(extraStore, offsetWithinMapping);
tierBulkOffsets.add(firstMappedBulkData);
for (int bulkIndex = firstBulkToMapIndex + 1; bulkIndex <= upToBulkIndex; bulkIndex++) {
tierBulkOffsets.add(new TierBulkData(firstMappedBulkData,
offsetWithinMapping += tierBulkSizeInBytes));
}
}
protected void addContext(ContextHolder contextHolder) {
resources.addContext(contextHolder);
}
public void addCloseable(Closeable closeable) {
resources.addCloseable(closeable);
}
/** For testing only */
public List<WeakReference<ContextHolder>> allContexts() {
return Collections.unmodifiableList(resources.contexts());
}
}