package io.blobkeeper.cluster.service;
/*
* Copyright (C) 2015 by Denis M. Gabaydulin
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.google.common.collect.ImmutableList;
import com.google.common.collect.RangeMap;
import com.google.common.collect.TreeRangeMap;
import io.blobkeeper.cluster.configuration.ClusterPropertiesConfiguration;
import io.blobkeeper.cluster.domain.*;
import io.blobkeeper.cluster.util.ClusterUtils;
import io.blobkeeper.cluster.util.ReplicationStatistic;
import io.blobkeeper.common.util.LeafNode;
import io.blobkeeper.common.util.MerkleTree;
import io.blobkeeper.file.domain.File;
import io.blobkeeper.file.domain.ReplicationFile;
import io.blobkeeper.file.service.FileListService;
import io.blobkeeper.file.service.PartitionService;
import io.blobkeeper.file.util.FileUtils;
import io.blobkeeper.file.util.IndexEltOffsetComparator;
import io.blobkeeper.index.domain.IndexElt;
import io.blobkeeper.index.domain.Partition;
import io.blobkeeper.index.service.IndexService;
import io.blobkeeper.index.service.NoIndexRangeException;
import org.jetbrains.annotations.NotNull;
import org.jgroups.Address;
import org.jgroups.JChannel;
import org.jgroups.Message;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static com.google.common.base.Preconditions.checkNotNull;
import static io.blobkeeper.cluster.domain.Command.FILE;
import static io.blobkeeper.cluster.util.ClusterUtils.createMessage;
import static io.blobkeeper.index.domain.PartitionState.NEW;
import static java.lang.Thread.sleep;
import static java.util.Collections.sort;
import static java.util.concurrent.CompletableFuture.runAsync;
@Singleton
public class ReplicationClientServiceImpl implements ReplicationClientService {
private static final Logger log = LoggerFactory.getLogger(ReplicationClientServiceImpl.class);
@Inject
private ClusterMembershipService membershipService;
@Inject
private FileListService fileListService;
@Inject
private IndexService indexService;
@Inject
private ClusterPropertiesConfiguration configuration;
@Inject
private ClusterUtils clusterUtils;
@Inject
private PartitionService partitionService;
@Inject
private ReplicationStatistic replicationStatistic;
@Override
public void replicate(@NotNull ReplicationFile file) {
if (log.isTraceEnabled()) {
log.trace("Replicating file {}", file);
}
Optional<Node> masterNode = membershipService.getMaster();
checkNotNull(masterNode.isPresent(), "Master node is required!");
membershipService.getNodes()
.stream()
.filter(node -> !(node.equals(masterNode.get()) || node.equals(membershipService.getSelfNode())))
.forEach(node -> runAsync(() -> replicate(file, node.getAddress())));
}
@Override
public void replicate(@NotNull ReplicationFile file, @NotNull Address dst) {
JChannel channel = membershipService.getChannel();
log.trace("Replication packet sending for {}", dst);
try {
Message message = createMessage(
membershipService.getSelfNode().getAddress(),
dst,
file,
new CustomMessageHeader(FILE)
);
channel.send(message);
} catch (Exception e) {
log.error("Can't replicate file", e);
throw new ReplicationServiceException(e);
} finally {
replicationStatistic.onReplicationElt();
}
}
// TODO: prevent simultaneous replication of multiple disk partitions (add disk lock?)
@Override
public void replicate(@NotNull DifferenceInfo differenceInfo, @NotNull Address dst) {
replicationStatistic.onReplicationRequest();
Partition partition = partitionService.getById(differenceInfo.getDisk(), differenceInfo.getPartition());
if (!isReplicationAvailable(partition, differenceInfo)) {
return;
}
// TODO: calculate what types are different instead whole range
RangeMap<Long, LeafNode> nodes = TreeRangeMap.create();
differenceInfo.getDifference().stream()
.forEach(diff -> nodes.put(diff.getRange(), diff));
log.info("File will be synced {}, dst node {}", differenceInfo, dst);
File file = null;
int sentElts = 1;
try {
file = fileListService.getFile(differenceInfo.getDisk(), differenceInfo.getPartition());
if (null == file) {
log.error("Can't replicate blob file {}, dst node {}", differenceInfo, dst);
return;
}
List<IndexElt> elts = new ArrayList<>(indexService.getListByPartition(partition));
// sort it by offset, to read file consequentially
sort(elts, new IndexEltOffsetComparator());
for (IndexElt elt : elts) {
// not in diff
if (null == nodes.get(elt.getId()) && !differenceInfo.isCompletelyDifferent()) {
continue;
}
// pause to send new files
if (sentElts % configuration.getReplicationMaxFiles() == 0) {
sleep(configuration.getReplicationDelay());
}
ByteBuffer buffer;
try {
buffer = FileUtils.readFile(file, elt.getOffset(), elt.getLength());
} catch (Exception e) {
log.error("Can't read data for index {}, required {}", elt, elt.getLength(), e);
continue;
}
byte[] bufferBytes = new byte[buffer.remaining()];
buffer.get(bufferBytes);
if (bufferBytes.length < elt.getLength()) {
log.error("Can't replicate elt {}", elt);
continue;
}
ReplicationFile replicationFile = new ReplicationFile(elt.getDiskIndexElt(), bufferBytes);
try {
replicate(replicationFile, dst);
} catch (ReplicationServiceException e) {
log.error("Can't replicate file {}", elt, e);
}
}
} catch (Exception e) {
log.error("Can't replicate block {}", partition, e);
} finally {
if (null != file) {
try {
file.close();
} catch (Exception ignored) {
}
}
}
}
private boolean isReplicationAvailable(Partition partition, DifferenceInfo differenceInfo) {
return partition.getState() == NEW && (differenceInfo.isCompletelyDifferent() || isExpectedMerkleTree(partition));
}
private boolean isExpectedMerkleTree(@NotNull Partition partition) {
MerkleTreeInfo local;
try {
local = membershipService.getMerkleTreeInfo(
membershipService.getSelfNode().getAddress(),
partition.getDisk(),
partition.getId()
);
} catch (NoIndexRangeException e) {
log.info("No elements in index", e);
return false;
}
// no file
if (null == local) {
log.error("No file");
return false;
}
Map<Integer, MerkleTreeInfo> expectedData = clusterUtils.getExpectedTrees(partition.getDisk(), ImmutableList.of(partition));
MerkleTreeInfo treeInfo = expectedData.get(partition.getId());
if (null == treeInfo) {
log.error("No tree info (no index?)");
return false;
}
MerkleTree expectedTree = treeInfo.getTree();
boolean treeIsExpected = MerkleTree.difference(expectedTree, local.getTree()).isEmpty();
if (!treeIsExpected) {
log.error(
"Can't replicate file {}, tree on master node {} is not equals to the expected {}",
partition,
local.getTree(),
expectedTree
);
}
return treeIsExpected;
}
}