/*
* Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.map.impl.mapstore.writebehind;
import com.hazelcast.config.MapStoreConfig;
import com.hazelcast.map.impl.MapServiceContext;
import com.hazelcast.map.impl.PartitionContainer;
import com.hazelcast.map.impl.mapstore.MapDataStore;
import com.hazelcast.map.impl.mapstore.MapStoreContext;
import com.hazelcast.map.impl.mapstore.writebehind.entry.DelayedEntry;
import com.hazelcast.map.impl.recordstore.RecordStore;
import com.hazelcast.spi.ExecutionService;
import com.hazelcast.spi.NodeEngine;
import com.hazelcast.spi.partition.IPartition;
import com.hazelcast.spi.partition.IPartitionService;
import com.hazelcast.spi.properties.GroupProperty;
import com.hazelcast.spi.properties.HazelcastProperties;
import com.hazelcast.util.Clock;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import static com.hazelcast.util.CollectionUtil.isEmpty;
import static java.lang.Thread.currentThread;
import static java.util.concurrent.TimeUnit.SECONDS;
/**
* When write-behind is enabled the work is offloaded to another thread than partition-operation-thread.
* That thread uses this runnable task to process write-behind-queues. This task collects entries from
* write behind queues and passes them to {@link #writeBehindProcessor}.
* <p/>
* Only one {@link StoreWorker} task is created for a map on a member.
*/
public class StoreWorker implements Runnable {
private final String mapName;
private final MapServiceContext mapServiceContext;
private final IPartitionService partitionService;
private final ExecutionService executionService;
private final WriteBehindProcessor writeBehindProcessor;
/**
* Run on backup nodes after this interval.
*/
private final long backupDelayMillis;
private final long writeDelayMillis;
private final int partitionCount;
/**
* Entries are fetched from write-behind-queues according to highestStoreTime. If an entry
* has a store-time which is smaller than or equal to the highestStoreTime, it will be processed.
*
* @see #calculateHighestStoreTime
*/
private long lastHighestStoreTime;
private volatile boolean running;
public StoreWorker(MapStoreContext mapStoreContext, WriteBehindProcessor writeBehindProcessor) {
this.mapName = mapStoreContext.getMapName();
this.mapServiceContext = mapStoreContext.getMapServiceContext();
NodeEngine nodeEngine = mapServiceContext.getNodeEngine();
this.partitionService = nodeEngine.getPartitionService();
this.executionService = nodeEngine.getExecutionService();
this.writeBehindProcessor = writeBehindProcessor;
this.backupDelayMillis = getReplicaWaitTimeMillis();
this.lastHighestStoreTime = Clock.currentTimeMillis();
this.writeDelayMillis = SECONDS.toMillis(getWriteDelaySeconds(mapStoreContext));
this.partitionCount = partitionService.getPartitionCount();
}
public synchronized void start() {
if (running) {
return;
}
running = true;
schedule();
}
public synchronized void stop() {
running = false;
}
@Override
public void run() {
try {
runInternal();
} finally {
if (running) {
schedule();
}
}
}
private void schedule() {
executionService.schedule(this, 1, SECONDS);
}
private void runInternal() {
final long now = Clock.currentTimeMillis();
// if this node is the owner of a partition, we use this criteria time.
final long ownerHighestStoreTime = calculateHighestStoreTime(lastHighestStoreTime, now);
// if this node is the backup of a partition, we use this criteria time because backups are processed after delay.
final long backupHighestStoreTime = ownerHighestStoreTime - backupDelayMillis;
lastHighestStoreTime = ownerHighestStoreTime;
List<DelayedEntry> ownersList = null;
List<DelayedEntry> backupsList = null;
for (int partitionId = 0; partitionId < partitionCount; partitionId++) {
if (currentThread().isInterrupted()) {
break;
}
RecordStore recordStore = getRecordStoreOrNull(mapName, partitionId);
if (!hasEntryInWriteBehindQueue(recordStore)) {
continue;
}
boolean localPartition = isPartitionLocal(partitionId);
if (!localPartition) {
backupsList = initListIfNull(backupsList, partitionCount);
selectEntriesToStore(recordStore, backupsList, backupHighestStoreTime);
} else {
ownersList = initListIfNull(ownersList, partitionCount);
selectEntriesToStore(recordStore, ownersList, ownerHighestStoreTime);
}
}
if (!isEmpty(ownersList)) {
Map<Integer, List<DelayedEntry>> failuresPerPartition = writeBehindProcessor.process(ownersList);
removeFinishedStoreOperationsFromQueues(mapName, ownersList);
reAddFailedStoreOperationsToQueues(mapName, failuresPerPartition);
}
if (!isEmpty(backupsList)) {
doInBackup(backupsList);
}
notifyFlush();
}
private static List<DelayedEntry> initListIfNull(List<DelayedEntry> list, int capacity) {
if (list == null) {
list = new ArrayList<DelayedEntry>(capacity);
}
return list;
}
/**
* Calculates highestStoreTime which is used to select processable entries from write-behind-queues.
* Entries which have smaller storeTimes than highestStoreTime will be processed.
*
* @param lastHighestStoreTime last calculated highest store time.
* @param now now in millis
* @return highestStoreTime in millis.
*/
private long calculateHighestStoreTime(long lastHighestStoreTime, long now) {
return now >= lastHighestStoreTime + writeDelayMillis ? now : lastHighestStoreTime;
}
private boolean hasEntryInWriteBehindQueue(RecordStore recordStore) {
if (recordStore == null) {
return false;
}
MapDataStore mapDataStore = recordStore.getMapDataStore();
WriteBehindStore dataStore = (WriteBehindStore) mapDataStore;
WriteBehindQueue<DelayedEntry> writeBehindQueue = dataStore.getWriteBehindQueue();
return writeBehindQueue.size() != 0;
}
private void notifyFlush() {
for (int partitionId = 0; partitionId < partitionCount; partitionId++) {
RecordStore recordStore = getRecordStoreOrNull(mapName, partitionId);
if (recordStore != null) {
WriteBehindStore mapDataStore = ((WriteBehindStore) recordStore.getMapDataStore());
mapDataStore.notifyFlush();
}
}
}
private boolean isPartitionLocal(int partitionId) {
IPartition partition = partitionService.getPartition(partitionId, false);
return partition.isLocal();
}
private void selectEntriesToStore(RecordStore recordStore, List<DelayedEntry> entries, long highestStoreTime) {
WriteBehindQueue<DelayedEntry> queue = getWriteBehindQueue(recordStore);
long nextSequenceToFlush = getSequenceToFlush(recordStore);
filterWriteBehindQueue(highestStoreTime, nextSequenceToFlush, entries, queue);
}
private void filterWriteBehindQueue(final long highestStoreTime, final long sequence, Collection<DelayedEntry> collection,
WriteBehindQueue<DelayedEntry> queue) {
if (sequence > 0) {
queue.filter(new IPredicate<DelayedEntry>() {
@Override
public boolean test(DelayedEntry delayedEntry) {
return delayedEntry.getSequence() <= sequence;
}
}, collection);
} else {
queue.filter(new IPredicate<DelayedEntry>() {
@Override
public boolean test(DelayedEntry delayedEntry) {
return delayedEntry.getStoreTime() <= highestStoreTime;
}
}, collection);
}
}
private void removeFinishedStoreOperationsFromQueues(String mapName, List<DelayedEntry> entries) {
for (DelayedEntry entry : entries) {
RecordStore recordStore = getRecordStoreOrNull(mapName, entry.getPartitionId());
if (recordStore != null) {
getWriteBehindQueue(recordStore).removeFirstOccurrence(entry);
}
}
}
private void reAddFailedStoreOperationsToQueues(String mapName, Map<Integer, List<DelayedEntry>> failuresPerPartition) {
if (failuresPerPartition.isEmpty()) {
return;
}
for (Map.Entry<Integer, List<DelayedEntry>> entry : failuresPerPartition.entrySet()) {
Integer partitionId = entry.getKey();
List<DelayedEntry> failures = failuresPerPartition.get(partitionId);
if (isEmpty(failures)) {
continue;
}
RecordStore recordStore = getRecordStoreOrNull(mapName, partitionId);
if (recordStore == null) {
continue;
}
final WriteBehindQueue<DelayedEntry> queue = getWriteBehindQueue(recordStore);
queue.addFirst(failures);
}
}
/**
* Process write-behind queues on backup partitions. It is a fake processing and
* it only removes entries from queues and does not persist any of them.
*
* @param delayedEntries entries to be processed.
*/
private void doInBackup(List<DelayedEntry> delayedEntries) {
writeBehindProcessor.callBeforeStoreListeners(delayedEntries);
removeFinishedStoreOperationsFromQueues(mapName, delayedEntries);
writeBehindProcessor.callAfterStoreListeners(delayedEntries);
}
private long getReplicaWaitTimeMillis() {
HazelcastProperties hazelcastProperties = mapServiceContext.getNodeEngine().getProperties();
return hazelcastProperties.getMillis(GroupProperty.MAP_REPLICA_SCHEDULED_TASK_DELAY_SECONDS);
}
private RecordStore getRecordStoreOrNull(String mapName, int partitionId) {
PartitionContainer partitionContainer = mapServiceContext.getPartitionContainer(partitionId);
return partitionContainer.getExistingRecordStore(mapName);
}
private WriteBehindQueue<DelayedEntry> getWriteBehindQueue(RecordStore recordStore) {
WriteBehindStore writeBehindStore = (WriteBehindStore) recordStore.getMapDataStore();
return writeBehindStore.getWriteBehindQueue();
}
private long getSequenceToFlush(RecordStore recordStore) {
WriteBehindStore writeBehindStore = (WriteBehindStore) recordStore.getMapDataStore();
return writeBehindStore.getSequenceToFlush();
}
private static int getWriteDelaySeconds(MapStoreContext mapStoreContext) {
MapStoreConfig mapStoreConfig = mapStoreContext.getMapStoreConfig();
return mapStoreConfig.getWriteDelaySeconds();
}
@Override
public String toString() {
return "StoreWorker{" + "mapName='" + mapName + "'}";
}
}