package io.blobkeeper.cluster.service;
/*
* Copyright (C) 2015 by Denis M. Gabaydulin
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.util.concurrent.Striped;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import io.blobkeeper.cluster.configuration.ClusterPropertiesConfiguration;
import io.blobkeeper.cluster.domain.*;
import io.blobkeeper.cluster.util.ClusterUtils;
import io.blobkeeper.cluster.util.ReplicationStatistic;
import io.blobkeeper.file.service.DiskService;
import io.blobkeeper.file.service.PartitionService;
import io.blobkeeper.index.domain.Partition;
import org.jetbrains.annotations.NotNull;
import org.jgroups.JChannel;
import org.jgroups.Message;
import org.joda.time.DateTime;
import org.joda.time.DateTimeConstants;
import org.joda.time.DateTimeZone;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.*;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Stream;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Iterables.size;
import static com.google.common.util.concurrent.Striped.semaphore;
import static io.blobkeeper.cluster.domain.Command.REPLICATION_REQUEST;
import static org.joda.time.DateTime.now;
import static org.joda.time.DateTimeZone.UTC;
@Singleton
public class RepairServiceImpl implements RepairService {
private static final Logger log = LoggerFactory.getLogger(RepairServiceImpl.class);
@Inject
private PartitionService partitionService;
@Inject
private DiskService diskService;
@Inject
private ClusterMembershipService membershipService;
@Inject
private ClusterUtils clusterUtils;
@Inject
private ClusterPropertiesConfiguration propertiesConfiguration;
private final Striped<Semaphore> semaphores = semaphore(16, 1);
private final ScheduledExecutorService replicationTaskExecutor =
Executors.newScheduledThreadPool(
32,
new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat("RepairWorker-%d")
.build()
);
public void init() {
replicationTaskExecutor.scheduleWithFixedDelay(
new RepairTask(),
getInitialDelaySeconds(),
DateTimeConstants.SECONDS_PER_DAY,
TimeUnit.SECONDS
);
}
@Override
public void repair(boolean allPartitions) {
diskService.getDisks().forEach(disk -> repair(disk, allPartitions));
}
@Override
public void repairActive() {
repair(false);
}
@Override
public boolean isRepairInProgress() {
List<Integer> disks = diskService.getDisks();
return size(semaphores.bulkGet(disks)) < disks.size();
}
/**
* Repairs files on the self node.
* <p>
* Before the repair has been started, it compares the files between remote node and target node.
* If the files are not equal and remote node has correct files,
* the target node requests a replication of corrupted file from the remote node.
* <p>
* The replication process is pretty simple.
* The remote node sends the request blob (block by block) to the target node.
*/
@Override
public void repair(int disk, boolean allPartitions) {
Semaphore semaphore = semaphores.get(disk);
try {
boolean acquired = semaphore.tryAcquire(5, TimeUnit.SECONDS);
if (!acquired) {
log.info("Repairing already in progress");
return;
}
log.info("Repair of disk {} started", disk);
membershipService.getMaster().ifPresent(
master -> {
Partition active = partitionService.getActivePartition(disk);
checkNotNull(active, "Active partition is required!");
log.info("Replication starts, master node is {}", master);
ReplicationTask replicationTask = new ReplicationTask(disk, allPartitions);
CompletableFuture.<Void>runAsync(replicationTask, replicationTaskExecutor)
.thenAcceptAsync(aVoid -> {
log.info("Repair of disk {} finished", disk);
semaphore.release();
}, replicationTaskExecutor)
.exceptionally(throwable -> {
log.error("Can't repair cluster", throwable);
semaphore.release();
return null;
});
}
);
} catch (Exception e) {
log.error("Can't repair cluster", e);
semaphore.release();
}
}
private Map<Integer, MerkleTreeInfo> getExpectedData(int disk) {
List<Partition> partitions = partitionService.getPartitions(disk);
return clusterUtils.getExpectedTrees(disk, partitions);
}
private int getInitialDelaySeconds() {
int repairHour = propertiesConfiguration.getRepairTimeHour();
int currentHour = now(UTC).getHourOfDay();
int delay = 0;
if (repairHour < currentHour) {
delay = 24 - currentHour - repairHour;
}
if (repairHour > currentHour) {
delay = repairHour - currentHour;
}
return delay * 60 * 60 + 30;
}
private class RepairTask implements Runnable {
@Override
public void run() {
try {
log.info("Repair all partitions started");
repair(true);
log.info("Repair all partitions finished");
} catch (Exception e) {
log.error("Can't start periodic repair", e);
}
}
}
private class ReplicationTask implements Runnable {
private final int disk;
private final Partition active;
private final boolean allPartitions;
ReplicationTask(int disk, boolean allPartitions) {
this.disk = disk;
this.active = partitionService.getActivePartition(disk);
this.allPartitions = allPartitions;
}
@Override
public void run() {
log.debug("Getting file list");
// TODO: add logging for filtered blobs
// log.debug("Replication of {} is not required for node {}", replicatingBlob, membershipService.getSelfNode());
Map<Integer, MerkleTreeInfo> expectedData;
if (allPartitions) {
expectedData = getExpectedData(disk);
} else {
expectedData = ImmutableMap.of();
}
try {
Stream.concat(
expectedData.values()
.stream()
.map(new TreeToRepairRequest()),
// active partition always replicates
Stream.of(getForActive())
).forEach(new DifferenceConsumer());
} catch (Exception e) {
log.error("Can't replicate file", e);
throw new ReplicationServiceException(e);
}
}
private RepairRequest getForActive() {
DifferenceInfo differenceInfo = new DifferenceInfo();
differenceInfo.setDisk(active.getDisk());
differenceInfo.setPartition(active.getId());
differenceInfo.setCompletelyDifferent(true);
return new RepairRequest.Builder()
.diff(differenceInfo)
.withNode(membershipService.getNodeForRepair(true))
.build();
}
private class DifferenceConsumer implements Consumer<RepairRequest> {
@Override
public void accept(RepairRequest repairRequest) {
if (!repairRequest.getRepairNode().isPresent()) {
log.error("No repair node for {}", repairRequest);
return;
}
if (repairRequest.getDifferenceInfo().isNoDiff()) {
log.info("No diff {}", repairRequest.getDifferenceInfo());
return;
}
JChannel channel = membershipService.getChannel();
log.info("Replication request sending for file {} to node {}", repairRequest.getDifferenceInfo(), repairRequest.getRepairNode().get());
try {
Message message = ClusterUtils.createMessage(
membershipService.getSelfNode().getAddress(),
repairRequest.getRepairNode().get().getAddress(),
repairRequest.getDifferenceInfo(),
new CustomMessageHeader(REPLICATION_REQUEST)
);
channel.send(message);
} catch (Exception e) {
log.error("Can't request replication for file {}", repairRequest.getDifferenceInfo(), e);
}
}
}
private class TreeToRepairRequest implements Function<MerkleTreeInfo, RepairRequest> {
/**
* @return non-empty difference, in case of trees are different between remote and local host
* Remote tree and expected tree (from blob index) must be equal.
*/
@Override
public RepairRequest apply(MerkleTreeInfo expected) {
boolean isActive = expected.getPartition() == active.getId();
boolean isSelfNodeMaster = membershipService.isMaster();
Optional<Node> remoteNode = membershipService.getNodeForRepair(isActive);
log.info("Disk, partition is {}, {}; Remote node is {}", expected.getDisk(), expected.getPartition(), remoteNode);
RepairRequest.Builder requestBuilder = new RepairRequest.Builder()
.withNode(remoteNode);
DifferenceInfo noDiff = new DifferenceInfo();
noDiff.setDifference(ImmutableList.of());
noDiff.setDisk(expected.getDisk());
noDiff.setPartition(expected.getPartition());
// FIXME: repair master?
// no need replicate active partition to master
if (isSelfNodeMaster && isActive) {
log.info("Master {} knows more about active partition {}, request node {}", membershipService.getMaster(), active, remoteNode);
return requestBuilder.diff(noDiff)
.build();
}
boolean isDstNodeMaster = remoteNode.equals(membershipService.getMaster());
// check active node only on master
if (!isDstNodeMaster && isActive) {
log.info("Only master {} knows about active partition {}, request node {}", membershipService.getMaster(), active, remoteNode);
return requestBuilder.diff(noDiff)
.build();
}
// TODO: add cache?
DifferenceInfo local = membershipService.getDifference(expected);
if (local != null && local.getDifference().isEmpty()) {
log.debug("Local file tree is equals to the expected for file {}", local);
return requestBuilder.diff(noDiff)
.build();
}
return requestBuilder.diff(local)
.build();
}
}
}
private static class RepairRequest {
private final DifferenceInfo differenceInfo;
private final Optional<Node> repairNode;
RepairRequest(Builder builder) {
this.differenceInfo = builder.differenceInfo;
this.repairNode = builder.repairNode;
}
DifferenceInfo getDifferenceInfo() {
return differenceInfo;
}
public Optional<Node> getRepairNode() {
return repairNode;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("differenceInfo", differenceInfo)
.add("repairNode", repairNode)
.toString();
}
static class Builder {
private DifferenceInfo differenceInfo;
private Optional<Node> repairNode;
Builder diff(@NotNull DifferenceInfo differenceInfo) {
this.differenceInfo = differenceInfo;
return this;
}
public Builder withNode(@NotNull Optional<Node> repairNode) {
this.repairNode = repairNode;
return this;
}
public RepairRequest build() {
return new RepairRequest(this);
}
}
}
}