/* * Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.hazelcast.map.impl.mapstore.writebehind; import com.hazelcast.config.Config; import com.hazelcast.config.InMemoryFormat; import com.hazelcast.config.MapConfig; import com.hazelcast.config.MapStoreConfig; import com.hazelcast.core.IMap; import com.hazelcast.map.impl.MapServiceContext; import com.hazelcast.map.impl.mapstore.AbstractMapDataStore; import com.hazelcast.map.impl.mapstore.MapStoreContext; import com.hazelcast.map.impl.mapstore.writebehind.entry.DelayedEntries; import com.hazelcast.map.impl.mapstore.writebehind.entry.DelayedEntry; import com.hazelcast.map.impl.operation.NotifyMapFlushOperation; import com.hazelcast.nio.serialization.Data; import com.hazelcast.spi.NodeEngine; import com.hazelcast.spi.Operation; import com.hazelcast.spi.OperationService; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Queue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicLong; import static com.hazelcast.config.InMemoryFormat.NATIVE; import static com.hazelcast.config.InMemoryFormat.OBJECT; import static com.hazelcast.map.impl.MapService.SERVICE_NAME; import static com.hazelcast.spi.impl.OperationResponseHandlerFactory.createEmptyResponseHandler; /** * Write behind map data store implementation. * Created per every record-store. Only called from one thread. */ public class WriteBehindStore extends AbstractMapDataStore<Data, Object> { /** * Represents a transient {@link DelayedEntry}. * A transient entry can be added via {@link com.hazelcast.core.IMap#putTransient}. */ private static final DelayedEntry TRANSIENT = DelayedEntries.emptyDelayedEntry(); /** * Sequence number of store operations. */ private final AtomicLong sequence = new AtomicLong(0); /** * Holds the sequences according to insertion order at which {@link IMap#flush()} was called. * <p/> * Upon end of a store operation, these sequences are used to find whether there is any flush request * waiting for the stored sequence, and if there is any, {@link NotifyMapFlushOperation} is send to notify * {@link com.hazelcast.map.impl.operation.AwaitMapFlushOperation AwaitMapFlushOperation} * * @see WriteBehindStore#notifyFlush */ private final Queue<Sequence> flushSequences = new ConcurrentLinkedQueue<Sequence>(); /** * @see {@link com.hazelcast.config.MapStoreConfig#setWriteCoalescing(boolean)} */ private final boolean coalesce; /** * {@code stagingArea} is a temporary living space for evicted data if we are using a write-behind map store. * Every eviction triggers a map store flush, and in write-behind mode this flush operation * should not cause any inconsistencies, such as reading a stale value from map store. * To prevent reading stale values when the time of a non-existent key is requested, before loading it from map-store * we search for an evicted entry in this space. If the entry is not there, * we ask map store to load it. All read operations use this staging area * to return the last set value on a specific key, since there is a possibility that * {@link com.hazelcast.map.impl.mapstore.writebehind.WriteBehindQueue} may contain more than one waiting operations * on a specific key. * <p/> * This space is also used to control any waiting delete operations on a key or any transiently put entries to {@code IMap}. * Values of any transiently put entries should not be added to this area upon eviction, otherwise subsequent * {@code IMap#get} operations may return stale values. * <p/> * NOTE: In case of eviction we do not want to make a huge database load by flushing entries uncontrollably. * We also do not want to make duplicate map-store calls for a key. This is why we use the staging area instead of the * direct flushing option to map-store. */ private final ConcurrentMap<Data, DelayedEntry> stagingArea = new ConcurrentHashMap<Data, DelayedEntry>(); private final OperationService operationService; private final InMemoryFormat inMemoryFormat; private final NodeEngine nodeEngine; private final String mapName; private final int partitionId; private WriteBehindProcessor writeBehindProcessor; private WriteBehindQueue<DelayedEntry> writeBehindQueue; public WriteBehindStore(MapStoreContext mapStoreContext, int partitionId) { super(mapStoreContext.getMapStoreWrapper(), mapStoreContext.getMapServiceContext().getNodeEngine().getSerializationService()); MapStoreConfig mapStoreConfig = mapStoreContext.getMapStoreConfig(); this.partitionId = partitionId; this.inMemoryFormat = getInMemoryFormat(mapStoreContext); this.coalesce = mapStoreConfig.isWriteCoalescing(); this.mapName = mapStoreContext.getMapName(); this.nodeEngine = mapStoreContext.getMapServiceContext().getNodeEngine(); this.operationService = nodeEngine.getOperationService(); } @Override public Object add(Data key, Object value, long now) { // When using format InMemoryFormat.NATIVE, just copy key & value to heap. if (NATIVE == inMemoryFormat) { value = toData(value); key = toData(key); } // This note describes the problem when we want to persist all states of an entry (means write-coalescing is off) // by using both EntryProcessor + OBJECT in-memory-format: // // If in-memory-format is OBJECT, there is a possibility that a previous state of an entry can be overwritten // by a subsequent write operation while both are waiting in the write-behind-queue, this is because they are referencing // to the same entry-value. To prevent such a problem, we are taking snapshot of the value by serializing it, // this means an extra serialization and additional latency for operations like map#put but it is needed, // otherwise we can lost a state. if (!coalesce && OBJECT == inMemoryFormat) { value = toData(value); } DelayedEntry<Data, Object> delayedEntry = DelayedEntries.createDefault(key, value, now, partitionId); add(delayedEntry); return value; } public void add(DelayedEntry<Data, Object> delayedEntry) { writeBehindQueue.addLast(delayedEntry); stagingArea.put(delayedEntry.getKey(), delayedEntry); delayedEntry.setSequence(sequence.incrementAndGet()); } @Override public void addTransient(Data key, long now) { if (NATIVE == inMemoryFormat) { key = toData(key); } stagingArea.put(key, TRANSIENT); } @Override public Object addBackup(Data key, Object value, long time) { return add(key, value, time); } @Override public void remove(Data key, long now) { if (NATIVE == inMemoryFormat) { key = toData(key); } DelayedEntry<Data, Object> delayedEntry = DelayedEntries.createWithoutValue(key, now, partitionId); add(delayedEntry); } @Override public void removeBackup(Data key, long time) { remove(key, time); } @Override public void reset() { writeBehindQueue.clear(); stagingArea.clear(); sequence.set(0); flushSequences.clear(); } @Override public Object load(Data key) { DelayedEntry delayedEntry = getFromStagingArea(key); if (delayedEntry == null) { return getStore().load(toObject(key)); } return toObject(delayedEntry.getValue()); } @Override public Map loadAll(Collection keys) { if (keys == null || keys.isEmpty()) { return Collections.emptyMap(); } Map<Object, Object> map = new HashMap<Object, Object>(); Iterator iterator = keys.iterator(); while (iterator.hasNext()) { Object key = iterator.next(); Data dataKey = toData(key); DelayedEntry delayedEntry = getFromStagingArea(dataKey); if (delayedEntry != null) { Object value = delayedEntry.getValue(); if (value != null) { map.put(dataKey, toObject(value)); } iterator.remove(); } } map.putAll(super.loadAll(keys)); return map; } /** * * Used in {@link com.hazelcast.core.IMap#loadAll} calls. * If the write-behind map-store feature is enabled, some things may lead to possible data inconsistencies. * These are: * - calling evict/evictAll, * - calling remove, and * - not yet stored write-behind queue operations. * <p/> * With this method, we can be sure if a key can be loadable from map-store or not. * * @param key the key to query whether it is loadable or not. * @return <code>true</code> if loadable, false otherwise. */ @Override public boolean loadable(Data key) { if (NATIVE == inMemoryFormat) { key = toData(key); } return !writeBehindQueue.contains(DelayedEntries.createDefault(key, null, -1, -1)); } @Override public int notFinishedOperationsCount() { return writeBehindQueue.size(); } @Override public Object flush(Data key, Object value, boolean backup) { if (NATIVE == inMemoryFormat) { key = toData(key); value = toData(value); } DelayedEntry delayedEntry = stagingArea.get(key); if (delayedEntry == TRANSIENT) { stagingArea.remove(key); return null; } if (writeBehindQueue.size() == 0 || !writeBehindQueue.contains(DelayedEntries.createWithoutValue(key))) { return null; } addAndGetSequence(false); return value; } @Override public long softFlush() { int size = writeBehindQueue.size(); if (size == 0) { return 0; } return addAndGetSequence(true); } /** * @param fullFlush {@code true} if flush cause is {@link IMap#flush()}, * {@code false} for flushes caused by eviction. * @return last store operations sequence number */ private long addAndGetSequence(boolean fullFlush) { Sequence sequence = new Sequence(this.sequence.get(), fullFlush); flushSequences.add(sequence); return sequence.getSequence(); } @Override public void hardFlush() { if (writeBehindQueue.size() == 0) { return; } writeBehindProcessor.flush(writeBehindQueue); } public WriteBehindQueue<DelayedEntry> getWriteBehindQueue() { return writeBehindQueue; } public void setWriteBehindQueue(WriteBehindQueue<DelayedEntry> writeBehindQueue) { this.writeBehindQueue = writeBehindQueue; } public void setWriteBehindProcessor(WriteBehindProcessor writeBehindProcessor) { this.writeBehindProcessor = writeBehindProcessor; } public void setSequence(long newSequence) { this.sequence.set(newSequence); } public void notifyFlush() { long nextSequenceNumber = sequence.get() + 1; DelayedEntry firstEntry = writeBehindQueue.peek(); if (firstEntry == null) { if (!flushSequences.isEmpty()) { findAwaitingFlushesAndSendNotification(nextSequenceNumber); } } else { findAwaitingFlushesAndSendNotification(firstEntry.getSequence()); } } private void findAwaitingFlushesAndSendNotification(long lastSequenceInQueue) { final int maxIterationCount = 100; Iterator<Sequence> iterator = flushSequences.iterator(); int iterationCount = 0; while (iterator.hasNext()) { Sequence flushSequence = iterator.next(); if (flushSequence.getSequence() < lastSequenceInQueue) { iterator.remove(); executeNotifyOperation(flushSequence); } if (++iterationCount == maxIterationCount) { break; } } } private void executeNotifyOperation(Sequence flushSequence) { if (!flushSequence.isFullFlush() || !nodeEngine.getPartitionService().isPartitionOwner(partitionId)) { return; } Operation operation = new NotifyMapFlushOperation(mapName, flushSequence.getSequence()); operation.setServiceName(SERVICE_NAME) .setNodeEngine(nodeEngine) .setPartitionId(partitionId) .setCallerUuid(nodeEngine.getLocalMember().getUuid()) .setOperationResponseHandler(createEmptyResponseHandler()); operationService.execute(operation); } protected void removeFromStagingArea(DelayedEntry delayedEntry) { if (delayedEntry == null) { return; } Data key = (Data) delayedEntry.getKey(); stagingArea.remove(key, delayedEntry); } private DelayedEntry getFromStagingArea(Data key) { DelayedEntry delayedEntry = stagingArea.get(key); if (delayedEntry == null || delayedEntry == TRANSIENT) { return null; } return delayedEntry; } public Queue<Sequence> getFlushSequences() { return flushSequences; } public long getSequenceToFlush() { final int maxIterationCount = 100; Iterator<Sequence> iterator = flushSequences.iterator(); long sequenceNumber = 0L; int iterationCount = 0; while (iterator.hasNext()) { Sequence sequence = iterator.next(); sequenceNumber = sequence.getSequence(); if (++iterationCount == maxIterationCount) { break; } } return sequenceNumber; } public void setFlushSequences(Queue<Sequence> flushSequences) { this.flushSequences.addAll(flushSequences); } private static InMemoryFormat getInMemoryFormat(MapStoreContext mapStoreContext) { MapServiceContext mapServiceContext = mapStoreContext.getMapServiceContext(); NodeEngine nodeEngine = mapServiceContext.getNodeEngine(); Config config = nodeEngine.getConfig(); String mapName = mapStoreContext.getMapName(); MapConfig mapConfig = config.findMapConfig(mapName); return mapConfig.getInMemoryFormat(); } /** * The purpose of this class is to provide distinction * between flushes caused by eviction and {@link IMap#flush()} */ public static class Sequence { /** * Sequence of the store operation. */ private final long sequence; /** * When {@code true}, it means {@link IMap#flush()} was called at this sequence. */ private final boolean fullFlush; public Sequence(long sequence, boolean fullFlush) { this.sequence = sequence; this.fullFlush = fullFlush; } public long getSequence() { return sequence; } public boolean isFullFlush() { return fullFlush; } } }