/* * Copyright 2016 The Simple File Server Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sfs.nodes; import com.google.common.base.Optional; import com.google.common.collect.Iterables; import io.vertx.core.Vertx; import io.vertx.core.buffer.Buffer; import io.vertx.core.logging.Logger; import io.vertx.core.streams.ReadStream; import org.sfs.Server; import org.sfs.VertxContext; import org.sfs.filesystem.volume.DigestBlob; import org.sfs.io.BufferEndableWriteStream; import org.sfs.io.MultiEndableWriteStream; import org.sfs.io.PipedEndableWriteStream; import org.sfs.io.PipedReadStream; import org.sfs.rx.Defer; import org.sfs.rx.RxHelper; import org.sfs.util.MessageDigestFactory; import org.sfs.vo.TransientServiceDef; import rx.Observable; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.NavigableMap; import java.util.Set; import static com.google.common.collect.Iterables.toArray; import static io.vertx.core.logging.LoggerFactory.getLogger; import static java.util.Collections.singletonList; import static org.sfs.io.AsyncIO.pump; import static org.sfs.rx.RxHelper.combineSinglesDelayError; public class VolumeReplicaGroup { private static final Logger LOGGER = getLogger(VolumeReplicaGroup.class); private final VertxContext<Server> vertxContext; private int numberOfObjectCopies; private boolean allowSameNode = false; private Set<String> excludeVolumes; private ClusterInfo clusterInfo; public VolumeReplicaGroup(VertxContext<Server> vertxContext, int numberOfObjectCopies) { this.vertxContext = vertxContext; this.numberOfObjectCopies = numberOfObjectCopies; this.clusterInfo = vertxContext.verticle().getClusterInfo(); } public boolean isAllowSameNode() { return allowSameNode; } public int getNumberOfObjectCopies() { return numberOfObjectCopies; } public VolumeReplicaGroup setNumberOfObjectCopies(int numberOfObjectCopies) { this.numberOfObjectCopies = numberOfObjectCopies; return this; } public VolumeReplicaGroup setAllowSameNode(boolean allowSameNode) { this.allowSameNode = allowSameNode; return this; } public VolumeReplicaGroup setExcludeVolumeIds(Iterable<String> volumeIds) { if (excludeVolumes != null) { excludeVolumes.clear(); } else { excludeVolumes = new HashSet<>(); } Iterables.addAll(excludeVolumes, volumeIds); return this; } public int getQuorumNumber() { return (numberOfObjectCopies / 2) + 1; } public int getQuorumMinNumberOfCopies() { return numberOfObjectCopies > 0 ? Math.max(getQuorumNumber(), 1) : 1; } public Observable<List<DigestBlob>> consume(final long length, final MessageDigestFactory messageDigestFactories, ReadStream<Buffer> src) { return consume(length, singletonList(messageDigestFactories), src); } public Observable<List<DigestBlob>> consume(final long length, final Iterable<MessageDigestFactory> messageDigestFactories, ReadStream<Buffer> src) { return calculateNodeWriteStreamBlobs(length, toArray(messageDigestFactories, MessageDigestFactory.class)) .flatMap(nodeWriteStreamBlobs -> { int size = nodeWriteStreamBlobs.size(); List<Observable<DigestBlob>> oDigests = new ArrayList<>(size); List<BufferEndableWriteStream> writeStreams = new ArrayList<>(size); for (int i = 0; i < size; i++) { PipedReadStream readStream = new PipedReadStream(); PipedEndableWriteStream writeStream = new PipedEndableWriteStream(readStream); Observable<DigestBlob> oDigest = nodeWriteStreamBlobs.get(i).consume(readStream); oDigests.add(oDigest); writeStreams.add(writeStream); } MultiEndableWriteStream multiWriteStream = new MultiEndableWriteStream(writeStreams); Observable<Void> producer = pump(src, multiWriteStream).single(); Observable<List<DigestBlob>> consumer = Observable.mergeDelayError(oDigests) .toList() .single(); // the zip operator will not work here // since the subscriptions need to run // in parallel due to the pipe connections return combineSinglesDelayError( producer, consumer, (aVoid, response) -> response); }); } public Observable<List<ConnectedVolume>> getReplicaVolumesForWrite(List<ConnectedVolume> toIgnore, long requiredSpace, int numberOfReplicas, boolean allowSameNode, MessageDigestFactory... messageDigestFactories) { if (numberOfReplicas > 0) { return getVolumesForWrite(clusterInfo.getStartedVolumeIdByUseableSpace(), toIgnore, requiredSpace, numberOfReplicas, allowSameNode, messageDigestFactories); } return Defer.just(Collections.emptyList()); } protected Observable<List<NodeWriteStreamBlob>> calculateNodeWriteStreamBlobs(final long length, final MessageDigestFactory... messageDigestFactories) { int replicaQuorumNumber = getQuorumMinNumberOfCopies(); return getReplicaVolumesForWrite(Collections.emptyList(), length, numberOfObjectCopies, allowSameNode, messageDigestFactories) .doOnNext(targetReplicaVolumes -> checkFoundSufficientVolumes(targetReplicaVolumes.size(), replicaQuorumNumber, false)) .flatMap(Observable::from) .map(ConnectedVolume::getNodeWriteStreamBlob) .toList(); } protected void checkFoundSufficientVolumes(int actualMatches, int expectedMatches, boolean primary) { if (actualMatches < expectedMatches) { throw new InsufficientReplicaVolumesAvailableException(expectedMatches, actualMatches); } } protected Observable<List<ConnectedVolume>> getVolumesForWrite(NavigableMap<Long, Set<String>> volumesBySpace, List<ConnectedVolume> toSkip, long requiredSpace, int numberToCollect, boolean allowSameNode, MessageDigestFactory... messageDigestFactories) { if (volumesBySpace != null && numberToCollect > 0) { NavigableMap<Long, Set<String>> descendingMap = volumesBySpace.descendingMap(); List<ConnectedVolume> results = new ArrayList<>(numberToCollect); Set<String> seenNodes = new HashSet<>(); Set<String> seenVolumes = new HashSet<>(); for (ConnectedVolume primaryTargetVolume : toSkip) { seenNodes.add(primaryTargetVolume.getNodeId()); seenVolumes.add(primaryTargetVolume.getVolumeId()); } if (excludeVolumes != null) { seenVolumes.addAll(excludeVolumes); } Vertx vertx = vertxContext.vertx(); return RxHelper.iterate(vertx, descendingMap.entrySet(), entry -> { long useableSpace = entry.getKey(); if (useableSpace * 0.90 >= requiredSpace) { Set<String> volumeIds = entry.getValue(); return RxHelper.iterate(vertx, volumeIds, volumeId -> { if (seenVolumes.add(volumeId)) { Optional<TransientServiceDef> oServiceDef = clusterInfo.getServiceDefForVolume(volumeId); if (oServiceDef.isPresent()) { TransientServiceDef serviceDef = oServiceDef.get(); Optional<XNode> oXNode = clusterInfo.getNodeForVolume(vertxContext, volumeId); if (oXNode.isPresent()) { XNode xNode = oXNode.get(); return xNode.createWriteStream(volumeId, requiredSpace, messageDigestFactories) .onErrorResumeNext(throwable -> { LOGGER.warn(String.format("Failed to connect to volume %s", volumeId), throwable); return Defer.just(null); }) .doOnNext(nodeWriteStreamBlob -> { if (nodeWriteStreamBlob != null) { ConnectedVolume connectedVolume = new ConnectedVolume(); connectedVolume.setxNode(xNode); connectedVolume.setVolumeId(volumeId); connectedVolume.setNodeId(serviceDef.getId()); connectedVolume.setNodeWriteStreamBlob(nodeWriteStreamBlob); if (allowSameNode || seenNodes.add(connectedVolume.getNodeId())) { results.add(connectedVolume); } } }) .map(nodeWriteStreamBlob -> { if (results.size() >= numberToCollect) { return false; } else { return true; } }); } else { return Defer.just(true); } } } return Defer.just(true); }); } else { return Defer.just(true); } }).map(aBoolean -> results); } return Defer.just(Collections.emptyList()); } public static class ConnectedVolume { private String nodeId; private XNode xNode; private String volumeId; private NodeWriteStreamBlob nodeWriteStreamBlob; public ConnectedVolume(String nodeId, XNode xNode, String volumeId) { this.nodeId = nodeId; this.xNode = xNode; this.volumeId = volumeId; } public ConnectedVolume() { } public NodeWriteStreamBlob getNodeWriteStreamBlob() { return nodeWriteStreamBlob; } public ConnectedVolume setNodeWriteStreamBlob(NodeWriteStreamBlob nodeWriteStreamBlob) { this.nodeWriteStreamBlob = nodeWriteStreamBlob; return this; } public void setNodeId(String nodeId) { this.nodeId = nodeId; } public void setxNode(XNode xNode) { this.xNode = xNode; } public void setVolumeId(String volumeId) { this.volumeId = volumeId; } public String getNodeId() { return nodeId; } public XNode getxNode() { return xNode; } public String getVolumeId() { return volumeId; } } }