TierRecovery.java example

Explorer

Chronicle-Map-master
- src
  - main
    - java
      - net
        openhft
        chronicle
        hash
        AbstractData.java
        Beta.java
        ChecksumEntry.java
        ChronicleHash.java
        ChronicleHashBuilder.java
        ChronicleHashBuilderPrivateAPI.java
        ChronicleHashClosedException.java
        ChronicleHashCorruption.java
        ChronicleHashRecoveryFailedException.java
        Data.java
        ExternalHashQueryContext.java
        HashAbsentEntry.java
        HashContext.java
        HashEntry.java
        HashQueryContext.java
        HashSegmentContext.java
        ReplicatedHashSegmentContext.java
        SegmentLock.java
        VanillaGlobalMutableState$$Native.java
        VanillaGlobalMutableState.java
        impl
        BigSegmentHeader.java
        ChronicleHashCloseOnExitHook.java
        ChronicleHashResources.java
        CompactOffHeapLinearHashTable.java
        ContextHolder.java
        HashSplitting.java
        InMemoryChronicleHashResources.java
        IntCompactOffHeapLinearHashTable.java
        LocalLockState.java
        LongCompactOffHeapLinearHashTable.java
        MemoryResource.java
        PersistedChronicleHashResources.java
        SegmentHeader.java
        SizePrefixedBlob.java
        TierCountersArea.java
        VanillaChronicleHash.java
        VanillaChronicleHashHolder.java
        stage
        data
        bytes
        EntryKeyBytesData.java
        InputKeyBytesData.java
        entry
        Alloc.java
        AllocatedChunks.java
        ChecksumHashing.java
        ChecksumStrategy.java
        HashEntryChecksumStrategy.java
        HashEntryStages.java
        HashLookupPos.java
        HashLookupSearch.java
        InputKeyHashCode.java
        KeyHashCode.java
        LocksInterface.java
        NoChecksumStrategy.java
        ReadLock.java
        SegmentStages.java
        UpdateLock.java
        WriteLock.java
        hash
        Chaining.java
        ChainingInterface.java
        CheckOnEachPublicOperation.java
        KeyBytesInterop.java
        LogHolder.java
        OwnerThreadHolder.java
        ThreadLocalState.java
        iter
        HashSegmentIteration.java
        IterationAlloc.java
        IterationKeyHashCode.java
        IterationSegmentStages.java
        SegmentsRecovery.java
        TierRecovery.java
        query
        HashQuery.java
        KeySearch.java
        QueryAlloc.java
        QueryHashLookupSearch.java
        QuerySegmentStages.java
        SearchAllocatedChunks.java
        replication
        ReplicableEntryDelegating.java
        util
        BuildVersion.java
        CanonicalRandomAccessFiles.java
        CharSequences.java
        FileIOUtils.java
        Objects.java
        Throwables.java
        jna
        PosixMsync.java
        WindowsMsync.java
        math
        ContinuedFraction.java
        Gamma.java
        PoissonDistribution.java
        Precision.java
        package-info.java
        locks
        InterProcessDeadLockException.java
        InterProcessLock.java
        InterProcessReadWriteUpdateLock.java
        package-info.java
        package-info.java
        replication
        DefaultEventualConsistencyStrategy.java
        RemoteOperationContext.java
        ReplicableEntry.java
        TimeProvider.java
        serialization
        BytesReader.java
        BytesWriter.java
        DataAccess.java
        ListMarshaller.java
        MapMarshaller.java
        SetMarshaller.java
        SizeMarshaller.java
        SizedReader.java
        SizedWriter.java
        StatefulCopyable.java
        impl
        AbstractCharSequenceUtf8DataAccess.java
        BooleanMarshaller.java
        ByteArrayDataAccess.java
        ByteArraySizedReader.java
        ByteBufferDataAccess.java
        ByteBufferSizedReader.java
        ByteableDataAccess.java
        ByteableSizedReader.java
        BytesAsSizedReader.java
        BytesMarshallableDataAccess.java
        BytesMarshallableReader.java
        CharSequenceBytesReader.java
        CharSequenceBytesWriter.java
        CharSequenceSizedReader.java
        CharSequenceSizedWriter.java
        CharSequenceUtf8DataAccess.java
        ConstantSizeMarshaller.java
        DefaultElasticBytes.java
        DoubleDataAccess.java
        DoubleMarshaller.java
        EnumMarshallable.java
        ExternalBytesMarshallableDataAccess.java
        ExternalizableDataAccess.java
        ExternalizableReader.java
        InstanceCreatingMarshaller.java
        IntegerDataAccess.java
        IntegerDataAccess_3_13.java
        IntegerMarshaller.java
        LongDataAccess.java
        LongMarshaller.java
        NotReusingSizedMarshallableDataAccess.java
        SerializableDataAccess.java
        SerializableReader.java
        SerializationBuilder.java
        SizedMarshallableDataAccess.java
        StopBitSizeMarshaller.java
        StringBuilderSizedReader.java
        StringBuilderUtf8DataAccess.java
        StringBytesReader.java
        StringSizedReader.java
        StringUtf8DataAccess.java
        ValueDataAccess.java
        ValueReader.java
        WrongXxHash.java
        package-info.java
        package-info.java
        map
        AbstractChronicleMap.java
        ChronicleHashCorruptionImpl.java
        ChronicleMap.java
        ChronicleMapBuilder.java
        ChronicleMapBuilderPrivateAPI.java
        ChronicleMapEntrySet.java
        ChronicleMapIterator.java
        DefaultSpi.java
        DefaultValueProvider.java
        ExternalMapQueryContext.java
        FindByName.java
        JsonSerializer.java
        MapAbsentEntry.java
        MapContext.java
        MapDiagnostics.java
        MapEntry.java
        MapEntryOperations.java
        MapMethods.java
        MapMethodsSupport.java
        MapQueryContext.java
        MapSegmentContext.java
        OldDeletedEntriesCleanupThread.java
        Replica.java
        ReplicatedChronicleMap.java
        ReplicatedGlobalMutableState$$Native.java
        ReplicatedGlobalMutableState.java
        ReturnValue.java
        SelectedSelectionKeySet.java
        VanillaChronicleMap.java
        WriteThroughEntry.java
        impl
        CompilationAnchor.java
        IterationContext.java
        MapIterationContext.java
        MapQueryContext.java
        NullReturnValue.java
        QueryContextInterface.java
        ReplicatedChronicleMapHolder.java
        ReplicatedIterationContext.java
        ReplicatedMapIterationContext.java
        ReplicatedMapQueryContext.java
        VanillaChronicleMapHolder.java
        ret
        InstanceReturnValue.java
        UsableReturnValue.java
        stage
        data
        DummyValueZeroData.java
        ZeroBytesStore.java
        bytes
        EntryValueBytesData.java
        WrappedValueBytesData.java
        instance
        WrappedValueInstanceDataHolder.java
        entry
        MapEntryStages.java
        ReplicatedMapEntryStages.java
        input
        ReplicatedInput.java
        iter
        IterationCheckOnEachPublicOperation.java
        MapSegmentIteration.java
        ReplicatedMapAbsentDelegatingForIteration.java
        ReplicatedMapEntryDelegating.java
        ReplicatedMapSegmentIteration.java
        ReplicatedTierRecovery.java
        map
        DefaultValue.java
        MapEntryOperationsDelegation.java
        ReplicatedChronicleMapHolderImpl.java
        ValueBytesInterop.java
        VanillaChronicleMapHolderImpl.java
        WrappedValueBytesDataAccess.java
        WrappedValueInstanceDataHolderAccess.java
        query
        Absent.java
        AcquireHandle.java
        MapAbsent.java
        MapAndSetContext.java
        MapQuery.java
        QueryCheckOnEachPublicOperation.java
        ReplicatedMapAbsent.java
        ReplicatedMapAbsentDelegating.java
        ReplicatedMapQuery.java
        replication
        ReplicatedQueryAlloc.java
        ReplicationUpdate.java
        ret
        DefaultReturnValue.java
        UsingReturnValue.java
        package-info.java
        replication
        MapRemoteOperations.java
        MapRemoteQueryContext.java
        MapReplicableEntry.java
        set
        ChronicleSet.java
        ChronicleSetBuilder.java
        ChronicleSetBuilderPrivateAPI.java
        DummyValue.java
        DummyValueData.java
        DummyValueMarshaller.java
        ExternalSetQueryContext.java
        SetAbsentEntry.java
        SetContext.java
        SetEntry.java
        SetEntryOperations.java
        SetFromMap.java
        SetQueryContext.java
        SetSegmentContext.java
        package-info.java
        replication
        SetRemoteOperations.java
        SetRemoteQueryContext.java
        SetReplicableEntry.java
        xstream
        converters
        AbstractChronicleMapConverter.java
        ByteBufferConverter.java
        CharSequenceConverter.java
        StringBuilderConverter.java
        ValueConverter.java
        VanillaChronicleMapConverter.java
  - test
    - java

/*
 *      Copyright (C) 2012, 2016  higherfrequencytrading.com
 *      Copyright (C) 2016 Roman Leventov
 *
 *      This program is free software: you can redistribute it and/or modify
 *      it under the terms of the GNU Lesser General Public License as published by
 *      the Free Software Foundation, either version 3 of the License.
 *
 *      This program is distributed in the hope that it will be useful,
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *      GNU Lesser General Public License for more details.
 *
 *      You should have received a copy of the GNU Lesser General Public License
 *      along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package net.openhft.chronicle.hash.impl.stage.iter;

import net.openhft.chronicle.bytes.BytesUtil;
import net.openhft.chronicle.hash.ChronicleHashCorruption;
import net.openhft.chronicle.hash.ChronicleHashRecoveryFailedException;
import net.openhft.chronicle.hash.Data;
import net.openhft.chronicle.hash.impl.CompactOffHeapLinearHashTable;
import net.openhft.chronicle.hash.impl.VanillaChronicleHash;
import net.openhft.chronicle.hash.impl.stage.entry.SegmentStages;
import net.openhft.chronicle.map.ChronicleHashCorruptionImpl;
import net.openhft.chronicle.map.ExternalMapQueryContext;
import net.openhft.chronicle.map.MapEntry;
import net.openhft.chronicle.map.VanillaChronicleMap;
import net.openhft.chronicle.map.impl.VanillaChronicleMapHolder;
import net.openhft.chronicle.map.impl.stage.entry.MapEntryStages;
import net.openhft.sg.StageRef;
import net.openhft.sg.Staged;

import static net.openhft.chronicle.map.ChronicleHashCorruptionImpl.*;

@Staged
public class TierRecovery {

    @StageRef VanillaChronicleMapHolder<?, ?, ?> mh;
    @StageRef SegmentStages s;
    @StageRef MapEntryStages<?, ?> e;
    @StageRef IterationKeyHashCode khc;

    public int recoverTier(
            int segmentIndex, ChronicleHashCorruption.Listener corruptionListener,
            ChronicleHashCorruptionImpl corruption) {
        s.freeList.clearAll();

        VanillaChronicleHash<?, ?, ?, ?> h = mh.h();
        CompactOffHeapLinearHashTable hl = h.hashLookup;
        long hlAddr = s.tierBaseAddr;

        long validEntries = 0;
        long hlPos = 0;
        do {
            long hlEntry = hl.readEntry(hlAddr, hlPos);
            nextHlPos:
            if (!hl.empty(hlEntry)) {
                // (*)
                hl.clearEntry(hlAddr, hlPos);
                if (validEntries >= h.maxEntriesPerHashLookup) {
                    report(corruptionListener, corruption, segmentIndex, () ->
                            format("Too many entries in tier with index {}, max is {}",
                            s.tierIndex, h.maxEntriesPerHashLookup)
                    );
                    break nextHlPos;
                }

                long searchKey = hl.key(hlEntry);
                long entryPos = hl.value(hlEntry);
                int si = checkEntry(searchKey, entryPos, segmentIndex,
                        corruptionListener, corruption);
                if (si < 0) {
                    break nextHlPos;
                } else {
                    s.freeList.setRange(entryPos, entryPos + e.entrySizeInChunks);
                    segmentIndex = si;
                }

                // The entry has passed all checks, re-insert:
                long startInsertPos = hl.hlPos(searchKey);
                long insertPos = startInsertPos;
                do {
                    long hlInsertEntry = hl.readEntry(hlAddr, insertPos);
                    if (hl.empty(hlInsertEntry)) {
                        hl.writeEntry(hlAddr, insertPos, hl.entry(searchKey, entryPos));
                        validEntries++;
                        break nextHlPos;
                    }
                    if (insertPos == hlPos) {
                        // means we made a whole loop, without finding a hole to re-insert entry,
                        // even if hashLookup was corrupted and all slots are dirty now, at least
                        // the slot cleared at (*) should be clear, if it is dirty, only
                        // a concurrent modification thread could occupy it
                        throw new ChronicleHashRecoveryFailedException(
                                "Concurrent modification of " + h.toIdentityString() +
                                        " while recovery procedure is in progress");
                    }
                    checkDuplicateKeys:
                    if (hl.key(hlInsertEntry) == searchKey) {
                        long anotherEntryPos = hl.value(hlInsertEntry);
                        if (anotherEntryPos == entryPos) {
                            validEntries++;
                            break nextHlPos;
                        }
                        long currentKeyOffset = e.keyOffset;
                        long currentKeySize = e.keySize;
                        int currentEntrySizeInChunks = e.entrySizeInChunks;
                        if (insertPos >= 0 && insertPos < hlPos) {
                            // insertPos already checked
                            e.readExistingEntry(anotherEntryPos);
                        } else if (checkEntry(searchKey, anotherEntryPos, segmentIndex,
                                corruptionListener, corruption) < 0) {
                            break checkDuplicateKeys;
                        }
                        if (e.keySize == currentKeySize &&
                                BytesUtil.bytesEqual(s.segmentBS, currentKeyOffset,
                                        s.segmentBS, e.keyOffset, currentKeySize)) {
                            report(corruptionListener, corruption, segmentIndex, () ->
                                    format("Entries with duplicate keys within a tier: " +
                                            "at pos {} and {} with key {}, first value is {}",
                                    entryPos, anotherEntryPos, e.key(), e.value())
                            );
                            s.freeList.clearRange(
                                    entryPos, entryPos + currentEntrySizeInChunks);
                            break nextHlPos;
                        }
                    }
                    insertPos = hl.step(insertPos);
                } while (insertPos != startInsertPos);
                throw new ChronicleHashRecoveryFailedException(
                        "HashLookup overflow should never occur. " +
                                "It might also be concurrent access to " + h.toIdentityString() +
                                " while recovery procedure is in progress");
            }
            hlPos = hl.step(hlPos);
        } while (hlPos != 0);
        shiftHashLookupEntries();
        return segmentIndex;
    }

    private void shiftHashLookupEntries() {
        VanillaChronicleHash<?, ?, ?, ?> h = mh.h();
        CompactOffHeapLinearHashTable hl = h.hashLookup;
        long hlAddr = s.tierBaseAddr;

        long hlPos = 0;
        long steps = 0;
        do {
            long hlEntry = hl.readEntry(hlAddr, hlPos);
            if (!hl.empty(hlEntry)) {
                long searchKey = hl.key(hlEntry);
                long hlHolePos = hl.hlPos(searchKey);
                while (hlHolePos != hlPos) {
                    long hlHoleEntry = hl.readEntry(hlAddr, hlHolePos);
                    if (hl.empty(hlHoleEntry)) {
                        hl.writeEntry(hlAddr, hlHolePos, hlEntry);
                        if (hl.remove(hlAddr, hlPos) != hlPos) {
                            hlPos = hl.stepBack(hlPos);
                            steps--;
                        }
                        break;
                    }
                    hlHolePos = hl.step(hlHolePos);
                }
            }
            hlPos = hl.step(hlPos);
            steps++;
        } while (hlPos != 0 || steps == 0);
    }

    public void removeDuplicatesInSegment(
            ChronicleHashCorruption.Listener corruptionListener,
            ChronicleHashCorruptionImpl corruption) {
        long startHlPos = 0L;
        VanillaChronicleMap<?, ?, ?> m = mh.m();
        CompactOffHeapLinearHashTable hashLookup = m.hashLookup;
        long currentTierBaseAddr = s.tierBaseAddr;
        while (!hashLookup.empty(hashLookup.readEntry(currentTierBaseAddr, startHlPos))) {
            startHlPos = hashLookup.step(startHlPos);
        }
        long hlPos = startHlPos;
        int steps = 0;
        long entries = 0;
        tierIteration:
        do {
            hlPos = hashLookup.step(hlPos);
            steps++;
            long entry = hashLookup.readEntry(currentTierBaseAddr, hlPos);
            if (!hashLookup.empty(entry)) {
                e.readExistingEntry(hashLookup.value(entry));
                Data key = e.key();
                try (ExternalMapQueryContext<?, ?, ?> c = m.queryContext(key)) {
                    MapEntry<?, ?> entry2 = c.entry();
                    Data<?> key2 = ((MapEntry) c).key();
                    long keyAddress = key.bytes().address(key.offset());
                    long key2Address = key2.bytes().address(key2.offset());
                    if (key2Address != keyAddress) {
                        report(corruptionListener, corruption, s.segmentIndex, () ->
                                format("entries with duplicate key {} in segment {}: " +
                                        "with values {} and {}, removing the latter",
                                key, c.segmentIndex(),
                                entry2 != null ? ((MapEntry) c).value() : "<deleted>",
                                !e.entryDeleted() ? e.value() : "<deleted>")
                        );
                        if (hashLookup.remove(currentTierBaseAddr, hlPos) != hlPos) {
                            hlPos = hashLookup.stepBack(hlPos);
                            steps--;
                        }
                        continue tierIteration;
                    }
                }
                entries++;
            }
            // the `steps == 0` condition and this variable updates in the loop fix the bug, when
            // shift deletion occurs on the first entry of the tier, and the hlPos
            // becomes equal to start pos without making the whole loop, but only visiting a single
            // entry
        } while (hlPos != startHlPos || steps == 0);

        recoverTierEntriesCounter(entries, corruptionListener, corruption);
        recoverLowestPossibleFreeChunkTiered(corruptionListener, corruption);
    }

    private void recoverTierEntriesCounter(
            long entries, ChronicleHashCorruption.Listener corruptionListener,
            ChronicleHashCorruptionImpl corruption) {
        if (s.tierEntries() != entries) {
            report(corruptionListener, corruption, s.segmentIndex, () ->
                    format("Wrong number of entries counter for tier with index {}, " +
                    "stored: {}, should be: {}", s.tierIndex, s.tierEntries(), entries)
            );
            s.tierEntries(entries);
        }
    }

    private void recoverLowestPossibleFreeChunkTiered(
            ChronicleHashCorruption.Listener corruptionListener,
            ChronicleHashCorruptionImpl corruption) {
        long lowestFreeChunk = s.freeList.nextClearBit(0);
        if (lowestFreeChunk == -1)
            lowestFreeChunk = mh.m().actualChunksPerSegmentTier;
        if (s.lowestPossiblyFreeChunk() != lowestFreeChunk) {
            long finalLowestFreeChunk = lowestFreeChunk;
            report(corruptionListener, corruption, s.segmentIndex, () ->
                    format("wrong lowest free chunk for tier with index {}, " +
                            "stored: {}, should be: {}",
                    s.tierIndex, s.lowestPossiblyFreeChunk(), finalLowestFreeChunk)
            );
            s.lowestPossiblyFreeChunk(lowestFreeChunk);
        }
    }

    private int checkEntry(
            long searchKey, long entryPos, int segmentIndex,
            ChronicleHashCorruption.Listener corruptionListener,
            ChronicleHashCorruptionImpl corruption) {
        VanillaChronicleHash<?, ?, ?, ?> h = mh.h();
        if (entryPos < 0 || entryPos >= h.actualChunksPerSegmentTier) {
            report(corruptionListener, corruption, segmentIndex, () ->
                    format("Entry pos is out of range: {}, should be 0-{}",
                    entryPos, h.actualChunksPerSegmentTier - 1)
            );
            return -1;
        }
        try {
            e.readExistingEntry(entryPos);
        } catch (Exception e) {
            reportException(corruptionListener, corruption, segmentIndex,
                    () -> "Exception while reading entry key size", e);
            return -1;
        }
        if (e.keyEnd() > s.segmentBytes.capacity()) {
            report(corruptionListener, corruption, segmentIndex, () ->
                    format("Wrong key size: {}", e.keySize)
            );
            return -1;
        }

        long keyHashCode = khc.keyHashCode();
        int segmentIndexFromKey = h.hashSplitting.segmentIndex(keyHashCode);
        if (segmentIndexFromKey < 0 || segmentIndexFromKey >= h.actualSegments) {
            report(corruptionListener, corruption, segmentIndex, () ->
                    format("Segment index from the entry key hash code is out of range: {}, " +
                            "should be 0-{}, entry key: {}",
                    segmentIndexFromKey, h.actualSegments - 1, e.key())
            );
            return -1;
        }

        long segmentHashFromKey = h.hashSplitting.segmentHash(keyHashCode);
        long searchKeyFromKey = h.hashLookup.maskUnsetKey(segmentHashFromKey);
        if (searchKey != searchKeyFromKey) {
            report(corruptionListener, corruption, segmentIndex, () ->
                    format("HashLookup searchKey: {}, HashLookup searchKey " +
                            "from the entry key hash code: {}, entry key: {}, entry pos: {}",
                    searchKey, searchKeyFromKey, e.key(), entryPos)
            );
            return -1;
        }

        try {
            // e.entryEnd() implicitly reads the value size, to be computed
            long entryAndChecksumEnd = e.entryEnd() + e.checksumStrategy.extraEntryBytes();
            if (entryAndChecksumEnd > s.segmentBytes.capacity()) {
                report(corruptionListener, corruption, segmentIndex, () ->
                        format("Wrong value size: {}, key: {}", e.valueSize, e.key())
                );
                return -1;
            }
        } catch (Exception ex) {
            reportException(corruptionListener, corruption, segmentIndex, () ->
                    "Exception while reading entry value size, key: " + e.key(), ex);
            return -1;
        }

        int storedChecksum = e.checksumStrategy.storedChecksum();
        int checksumFromEntry = e.checksumStrategy.computeChecksum();
        if (storedChecksum != checksumFromEntry) {
            report(corruptionListener, corruption, segmentIndex, () ->
                    format("Checksum doesn't match, stored: {}, should be from " +
                            "the entry bytes: {}, key: {}, value: {}",
                    storedChecksum, checksumFromEntry, e.key(), e.value())
            );
            return -1;
        }

        if (!s.freeList.isRangeClear(entryPos, entryPos + e.entrySizeInChunks)) {
            report(corruptionListener, corruption, segmentIndex, () ->
                    format("Overlapping entry: positions {}-{}, key: {}, value: {}",
                    entryPos, entryPos + e.entrySizeInChunks - 1, e.key(), e.value())
            );
            return -1;
        }

        if (segmentIndex < 0) {
            return segmentIndexFromKey;
        } else {
            if (segmentIndex != segmentIndexFromKey) {
                report(corruptionListener, corruption, segmentIndex, () ->
                format("Expected segment index: {}, segment index from the entry key: {}, " +
                                "key: {}, value: {}",
                        segmentIndex, searchKeyFromKey, e.key(), e.value())
                );
                return -1;
            } else {
                return segmentIndex;
            }
        }
    }
}