/* * Copyright 2016 The Simple File Server Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sfs.filesystem.volume; import com.google.common.base.Joiner; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.io.BaseEncoding; import com.google.common.math.LongMath; import io.vertx.core.Context; import io.vertx.core.Handler; import io.vertx.core.buffer.Buffer; import io.vertx.core.logging.Logger; import io.vertx.core.logging.LoggerFactory; import io.vertx.core.streams.ReadStream; import org.sfs.SfsVertx; import org.sfs.block.RangeLock; import org.sfs.block.RecyclingAllocator; import org.sfs.filesystem.BlobFile; import org.sfs.filesystem.ChecksummedPositional; import org.sfs.io.BufferEndableWriteStream; import org.sfs.protobuf.XVolume; import org.sfs.rx.Defer; import org.sfs.rx.ObservableFuture; import org.sfs.rx.RxHelper; import org.sfs.rx.ToVoid; import org.sfs.util.ExceptionHelper; import org.sfs.vo.TransientXAllocatedFile; import org.sfs.vo.TransientXFileSystem; import org.sfs.vo.TransientXVolume; import rx.Observable; import rx.Subscriber; import rx.functions.Func1; import java.io.IOException; import java.nio.file.FileStore; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.UUID; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; public class VolumeV1 implements Volume { private enum GcState { PAUSED, EXECUTING, STOPPED } private final Logger logger; // This value needs to be set high enough so that that when MaintainObjectsForNode // makes a copy of an object it's able to ack this copy during it's next run interval. // // The object is not ack'd during the initial copy since the when I wrote the copy code // it was simpler to leave the object copy unacked until the next run interval. Acking the // object during the copy would require a subsequent update to the index which is awkward // to do since MaintainObjectsForNode updates the index using bulk api // public static final long MAX_GC_AGE = TimeUnit.DAYS.toMillis(4); public static final int INDEX_BLOCK_SIZE = 60; public static final int DATA_BLOCK_SIZE = 8; public static final int TINY_DATA_THRESHOLD = INDEX_BLOCK_SIZE + DATA_BLOCK_SIZE; private static final int INDEX_SCAN_BATCH_SIZE = 1000; private static final long ACTIVE_WRITE_STREAM_TIMEOUT = TimeUnit.MINUTES.toMillis(1); private static final long LOCK_WAIT_TIMEOUT = TimeUnit.SECONDS.toMillis(30); private static final int MAX_FREE_RANGES = 100000; private Path metaFilePath; private Path dataFilePath; private Path indexFilePath; private Logger gcLogger; private RecyclingAllocator dataFileAllocator; private RecyclingAllocator indexFileAllocator; private final Path basePath; private String volumeId; private AtomicReference<GcState> gcState = new AtomicReference<>(GcState.STOPPED); private AtomicReference<Status> volumeState = new AtomicReference<>(Status.STOPPED); private MetaFile metaFile; private IndexFile indexFile; private BlobFile blobFile; private int indexBlockSize = -1; private int dataBlockSize = -1; public VolumeV1(Path path) { this.basePath = path; this.metaFilePath = metaFilePath(basePath); this.dataFilePath = dataFilePath(basePath); this.indexFilePath = indexFilePath(basePath); logger = LoggerFactory.getLogger(VolumeV1.class.getName() + "." + join(basePath)); gcLogger = LoggerFactory.getLogger(VolumeV1.class.getName() + "." + join(basePath) + ".gc"); } protected static String join(Path path) { return Joiner.on('.').join(path); } protected Path metaFilePath(Path basePath) { return Paths.get(basePath.toString(), "meta").normalize(); } protected Path indexFilePath(Path basePath) { return Paths.get(basePath.toString(), "index").normalize(); } protected Path dataFilePath(Path basePath) { return Paths.get(basePath.toString(), "data").normalize(); } @Override public String getVolumeId() { return volumeId; } @Override public Status status() { return volumeState.get(); } @Override public Observable<TransientXVolume> volumeInfo(SfsVertx vertx) { Context context = vertx.getOrCreateContext(); return Defer.aVoid() .doOnNext(aVoid -> { checkStarted(); }) .flatMap(aVoid -> RxHelper.executeBlocking(context, vertx.getBackgroundPool(), () -> { try { FileStore fileStore = Files.getFileStore(basePath); long usableSpace = fileStore.getUsableSpace(); long actualUsableSpace; try { actualUsableSpace = LongMath.checkedAdd(indexFileAllocator.getBytesFree(usableSpace), dataFileAllocator.getBytesFree(usableSpace)); } catch (ArithmeticException e) { actualUsableSpace = usableSpace; } TransientXAllocatedFile indexFileInfo = new TransientXAllocatedFile() .setFile(indexFilePath.toString()) .setFileSizeBytes(Files.size(indexFilePath)) .setBytesFree(indexFileAllocator.getBytesFree(usableSpace)) .setFreeRangeCount(indexFileAllocator.getNumberOfFreeRanges()) .setLockCount(indexFile.getLockCount()) .setWriteQueueBytesPending(indexFile.getWriteQueueSize()) .setWriteQueueBytesFull(indexFile.getWriteQueueMaxWrites()) .setWriteQueueBytesDrained(indexFile.getWriteQueueLowWater()); TransientXAllocatedFile dataFileInfo = new TransientXAllocatedFile() .setFile(dataFilePath.toString()) .setFileSizeBytes(Files.size(dataFilePath)) .setBytesFree(dataFileAllocator.getBytesFree(usableSpace)) .setFreeRangeCount(dataFileAllocator.getNumberOfFreeRanges()) .setLockCount(blobFile.getLockCount()) .setWriteQueueBytesPending(blobFile.getWriteQueueSize()) .setWriteQueueBytesFull(blobFile.getWriteQueueMaxWrites()) .setWriteQueueBytesDrained(blobFile.getWriteQueueLowWater()); TransientXFileSystem fileSystemInfo = new TransientXFileSystem() .setDevice(fileStore.name()) .setPath(basePath.toString()) .setTotalSpace(fileStore.getTotalSpace()) .setUnallocatedSpace(fileStore.getUnallocatedSpace()) .setUsableSpace(usableSpace) .setType(fileStore.type()) .setPartition(basePath.getRoot().toString()); TransientXVolume volumeInfo = new TransientXVolume() .setId(volumeId) .setIndexFile(indexFileInfo) .setDataFile(dataFileInfo) .setFileSystem(fileSystemInfo) .setUsableSpace(actualUsableSpace) .setStatus(volumeState.get()); return volumeInfo; } catch (IOException e) { throw new RuntimeException(e); } })); } @Override public Observable<Void> copy(SfsVertx vertx, Path destinationDirectory) { Observable<Void> o = Defer.aVoid() .doOnNext(aVoid -> { checkStarted(); }) .doOnNext(aVoid -> { if (logger.isDebugEnabled()) { logger.debug("Waiting for gc pause for copy of " + basePath + " to " + destinationDirectory); } }) .flatMap(aVoid -> waitAndPauseGc(vertx)) .doOnNext(aVoid -> { if (logger.isDebugEnabled()) { logger.debug("Gc paused for copy of " + basePath + " to " + destinationDirectory); } }) .doOnNext(aVoid -> { if (logger.isDebugEnabled()) { logger.debug("Started copy of " + basePath + " to " + destinationDirectory); } }) .flatMap(aVoid -> { if (logger.isDebugEnabled()) { logger.debug("Creating directory " + destinationDirectory); } ObservableFuture<Void> handler = RxHelper.observableFuture(); vertx.fileSystem().mkdirs(destinationDirectory.toString(), null, handler.toHandler()); return handler .map(aVoid1 -> { if (logger.isDebugEnabled()) { logger.debug("Created directory " + destinationDirectory); } return (Void) null; }); }) .flatMap(aVoid -> { MetaFile dstMetaFile = new MetaFile(metaFilePath(destinationDirectory)); IndexFile dstIndexFile = new IndexFile(indexFilePath(destinationDirectory), indexBlockSize); BlobFile dstBlobFile = new BlobFile(dataFilePath(destinationDirectory), dataBlockSize, ACTIVE_WRITE_STREAM_TIMEOUT); return Observable.just((Void) null) .flatMap(aVoid1 -> dstMetaFile.open(vertx, StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE)) .flatMap(aVoid1 -> dstIndexFile.open(vertx, StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE)) .flatMap(aVoid1 -> dstBlobFile.open(vertx, StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE)) .flatMap(aVoid1 -> dstMetaFile.enableWrites(vertx)) .flatMap(aVoid1 -> dstIndexFile.enableWrites(vertx)) .flatMap(aVoid1 -> dstBlobFile.enableWrites(vertx)) .flatMap(aVoid1 -> getSuperBlock(vertx) .flatMap(xSuperBlock -> dstMetaFile.set(vertx, xSuperBlock))) .flatMap(aVoid1 -> scanIndex(vertx, IndexBlockReader.LockType.READ, checksummedPositional -> { XVolume.XIndexBlock header = checksummedPositional.getValue(); return dstIndexFile.setBlock(vertx, checksummedPositional.getPosition(), checksummedPositional.getValue()) .flatMap(aVoid2 -> blobFile.copy(vertx, header.getDataPosition(), header.getDataLength(), dstBlobFile, header.getDataPosition(), header.getDataLength())) .singleOrDefault(null); })) .flatMap(aVoid1 -> dstMetaFile.disableWrites(vertx)) .flatMap(aVoid1 -> dstIndexFile.disableWrites(vertx)) .flatMap(aVoid1 -> dstBlobFile.disableWrites(vertx)) .flatMap(aVoid1 -> dstMetaFile.force(vertx, true)) .flatMap(aVoid1 -> dstIndexFile.force(vertx, true)) .flatMap(aVoid1 -> dstBlobFile.force(vertx, true)) .flatMap(aVoid1 -> dstMetaFile.close(vertx)) .flatMap(aVoid1 -> dstIndexFile.close(vertx)) .flatMap(aVoid1 -> dstBlobFile.close(vertx)); }) .doOnNext(aVoid -> { if (logger.isDebugEnabled()) { logger.debug("Finished copy of " + basePath + " to " + destinationDirectory); } }); AtomicBoolean resumed = new AtomicBoolean(false); return Observable.using( () -> null, aVoid -> o .single() .doOnNext(aVoid1 -> { resumed.compareAndSet(false, true); if (logger.isDebugEnabled()) { logger.debug("Resume gc for copy of " + basePath + " to " + destinationDirectory); } }), aVoid -> { if (!resumed.get()) { if (logger.isDebugEnabled()) { logger.debug("Resume gc for copy of " + basePath + " to " + destinationDirectory); } resumeGc(); } }, true) .onErrorResumeNext(throwable -> { Optional<RejectedExecutionException> oException = ExceptionHelper.unwrapCause(RejectedExecutionException.class, throwable); if (oException.isPresent()) { return Observable.error(new VolumeToBusyExecutionException(oException.get())); } else { return Observable.error(throwable); } }); } protected Observable<XVolume.XSuperBlock> getSuperBlock(SfsVertx vertx) { return RangeLock.lockedObservable(vertx, () -> metaFile.tryReadLock(), () -> metaFile.getBlock(vertx).map(Optional::get), LOCK_WAIT_TIMEOUT); } protected Observable<Void> setSuperBlock(SfsVertx vertx, XVolume.XSuperBlock xSuperBlock) { return RangeLock.lockedObservable(vertx, () -> metaFile.tryWriteLock(), () -> metaFile.set(vertx, xSuperBlock), LOCK_WAIT_TIMEOUT) .flatMap(aVoid -> metaFile.force(vertx, false)); } @Override public Observable<Void> open(SfsVertx vertx) { final VolumeV1 _this = this; Context context = vertx.getOrCreateContext(); return Defer.aVoid() .doOnNext(aVoid -> Preconditions.checkState(volumeState.compareAndSet(Status.STOPPED, Status.STARTING))) .doOnNext(aVoid -> logger.info("Starting volume " + basePath.toString())) .flatMap(aVoid -> RxHelper.executeBlocking(context, vertx.getBackgroundPool(), () -> { try { Files.createDirectories(basePath); metaFilePath = metaFilePath(basePath).normalize(); dataFilePath = dataFilePath(basePath).normalize(); indexFilePath = indexFilePath(basePath).normalize(); return (Void) null; } catch (IOException e) { throw new RuntimeException(e); } })) .doOnNext(aVoid -> logger.info("Starting Metadata Initialization")) .doOnNext(aVoid -> metaFile = new MetaFile(metaFilePath)) .flatMap(aVoid -> metaFile.open(vertx, StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE)) .flatMap(aVoid -> metaFile.size(vertx) .flatMap(size -> { if (size <= 0) { _this.volumeId = UUID.randomUUID().toString(); _this.dataBlockSize = DATA_BLOCK_SIZE; _this.indexBlockSize = INDEX_BLOCK_SIZE; XVolume.XSuperBlock xSuperBlock = XVolume.XSuperBlock.newBuilder() .setVolumeId(_this.volumeId) .setDataBlockSize(dataBlockSize) .setIndexBlockSize(indexBlockSize) .build(); return metaFile.enableWrites(vertx) .flatMap(aVoid1 -> setSuperBlock(vertx, xSuperBlock)); } else { return metaFile.getBlock(vertx) .map(Optional::get) .map(superBlock -> { Preconditions.checkState(superBlock.getType() != null, "Corrupt superblock"); Preconditions.checkState(superBlock.getVolumeId() != null, "Corrupt superblock"); Preconditions.checkState(superBlock.getDataBlockSize() > 0, "Corrupt superblock"); Preconditions.checkState(superBlock.getIndexBlockSize() > 0, "Corrupt superblock"); _this.volumeId = superBlock.getVolumeId(); _this.indexBlockSize = superBlock.getIndexBlockSize(); _this.dataBlockSize = superBlock.getDataBlockSize(); // TODO: add functionality to upgrade the volume data structures Preconditions.checkState(_this.indexBlockSize == INDEX_BLOCK_SIZE, "Index block size %s does not match the expected block size for this volume %s", _this.indexBlockSize, INDEX_BLOCK_SIZE); Preconditions.checkState(_this.dataBlockSize == DATA_BLOCK_SIZE, "Index block size %s does not match the expected block size for this volume %s", _this.dataBlockSize, DATA_BLOCK_SIZE); return (Void) null; }) .flatMap(aVoid1 -> metaFile.enableWrites(vertx)); } })) .doOnNext(aVoid -> logger.info("Finished Metadata Initialization")) .doOnNext(aVoid -> { dataFileAllocator = new RecyclingAllocator(dataBlockSize); indexFileAllocator = new RecyclingAllocator(indexBlockSize); indexFile = new IndexFile(indexFilePath, indexBlockSize); blobFile = new BlobFile(dataFilePath, dataBlockSize, ACTIVE_WRITE_STREAM_TIMEOUT); }) .flatMap(aVoid -> indexFile.open(vertx, StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE)) .flatMap(aVoid -> blobFile.open(vertx, StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE)) .doOnNext(aVoid -> logger.info("Started Allocator Initialization")) .flatMap(aVoid -> scanIndex(vertx, IndexBlockReader.LockType.NONE, checksummedPositional -> { XVolume.XIndexBlock header = checksummedPositional.getValue(); // Allocate all records that haven't been marked as garbage collected if (!header.getGarbageCollected()) { final long headerPosition = checksummedPositional.getPosition(); long dataPosition = header.getDataPosition(); long dataLength = header.getDataLength(); long allocatedIndexPosition = indexFileAllocator.alloc(headerPosition, indexBlockSize); long allocatedDataPosition = dataFileAllocator.alloc(dataPosition, dataLength); if (allocatedDataPosition != dataPosition) { Preconditions.checkState(allocatedDataPosition == dataPosition, "Data position was %s, expected %s, block allocator was %s", allocatedDataPosition, dataPosition, dataFileAllocator.toString()); } if (allocatedIndexPosition != headerPosition) { Preconditions.checkState(allocatedIndexPosition == headerPosition, "Header position was %s, expected %s, block allocator was %s", allocatedIndexPosition, headerPosition, indexFileAllocator.toString()); } } return Observable.just(null); })) .doOnNext(aVoid -> logger.info("Finished Allocator Initialization")) .flatMap(aVoid -> indexFile.enableWrites(vertx)) .flatMap(aVoid -> blobFile.enableWrites(vertx)) .doOnNext(aVoid -> logger.info("Starting Garbage Collector Initialization")) .map(aVoid -> { final long interval = TimeUnit.MINUTES.toMillis(1); Handler<Long> handler = new Handler<Long>() { Handler<Long> _this = this; @Override public void handle(Long event) { garbageCollection(vertx) .count() .map(new ToVoid<>()) .singleOrDefault(null) .subscribe(new Subscriber<Void>() { @Override public void onCompleted() { vertx.setTimer(interval, _this); } @Override public void onError(Throwable e) { vertx.setTimer(interval, _this); } @Override public void onNext(Void aVoid) { // do nothing } }); } }; vertx.setTimer(interval, handler); return (Void) null; }) .doOnNext(aVoid -> logger.info("Finished Garbage Collector Initialization")) .doOnNext(aVoid -> Preconditions.checkState(volumeState.compareAndSet(Status.STARTING, Status.STARTED))) .doOnNext(aVoid -> { logger.info("Started volume " + basePath.toString()); }); } @Override public Observable<Void> close(SfsVertx vertx) { return Defer.aVoid() .doOnNext(aVoid -> { logger.info("Stopping volume " + basePath.toString()); Preconditions.checkState(volumeState.compareAndSet(Status.STARTED, Status.STOPPING)); }) .flatMap(aVoid -> waitAndPauseGc(vertx)) .onErrorResumeNext(throwable -> { logger.error("Handling error", throwable); return Observable.just(null); }) .flatMap(aVoid -> { if (metaFile != null) { return metaFile.disableWrites(vertx) .flatMap(aVoid1 -> metaFile.force(vertx, true)) .flatMap(aVoid1 -> metaFile.close(vertx)); } return Observable.just(null); }) .onErrorResumeNext(throwable -> { logger.error("Handling error", throwable); return Observable.just(null); }) .flatMap(aVoid -> { if (indexFile != null) { return indexFile.disableWrites(vertx) .flatMap(aVoid1 -> indexFile.force(vertx, true)) .flatMap(aVoid1 -> indexFile.close(vertx)); } return Observable.just(null); }) .onErrorResumeNext(throwable -> { logger.error("Handling error", throwable); return Observable.just(null); }) .flatMap(aVoid -> { if (blobFile != null) { return blobFile.disableWrites(vertx) .flatMap(aVoid1 -> blobFile.force(vertx, true)) .flatMap(aVoid1 -> blobFile.close(vertx)); } return Observable.just(null); }) .onErrorResumeNext(throwable -> { logger.error("Handling error", throwable); return Observable.just(null); }) .doOnNext(aVoid -> Preconditions.checkState(volumeState.compareAndSet(Status.STOPPING, Status.STOPPED))) .doOnNext(aVoid -> { logger.info("Stopped volume " + basePath.toString()); }); } @Override public Observable<Optional<ReadStreamBlob>> getDataStream(SfsVertx vertx, final long position, final Optional<Long> oOffset, final Optional<Long> oLength) { return Defer.aVoid() .doOnNext(aVoid -> checkStarted()) .flatMap(aVoid -> RangeLock.lockedObservable(vertx, () -> indexFile.tryReadLock(position, indexBlockSize), () -> getIndexBlock0(vertx, position), LOCK_WAIT_TIMEOUT)) .filter(Optional::isPresent) .map(Optional::get) .filter(positional -> !positional.getValue().getGarbageCollected()) .filter(positional -> !positional.getValue().getDeleted()) .flatMap(positional -> { XVolume.XIndexBlock header = positional.getValue(); long dataPosition = header.getDataPosition(); long dataLength = header.getDataLength(); long startPosition; if (oOffset.isPresent()) { long offset = oOffset.get(); startPosition = offset <= 0 ? dataPosition : LongMath.checkedAdd(dataPosition, offset); } else { startPosition = dataPosition; } long normalizedLength; if (oLength.isPresent()) { long length = oLength.get(); normalizedLength = length <= -1 ? dataLength : Math.min(dataLength, length); } else { normalizedLength = dataLength; } long endPosition = LongMath.checkedAdd(dataPosition, dataLength); if (startPosition >= endPosition) { Preconditions.checkState(startPosition <= endPosition, "Offset must be <= %s", endPosition - dataPosition); } ReadStreamBlob readStreamBlob = new ReadStreamBlob(volumeId, position, 0, normalizedLength) { @Override public Observable<Void> produce(BufferEndableWriteStream endableWriteStream) { return blobFile.produce(vertx, startPosition, normalizedLength, endableWriteStream) .onErrorResumeNext(throwable -> { Optional<RejectedExecutionException> oException = ExceptionHelper.unwrapCause(RejectedExecutionException.class, throwable); if (oException.isPresent()) { return Observable.error(new VolumeToBusyExecutionException(oException.get())); } else { return Observable.error(throwable); } }); } }; return Observable.just(Optional.of(readStreamBlob)); }) .singleOrDefault(Optional.absent()) .onErrorResumeNext(throwable -> { Optional<RejectedExecutionException> oException = ExceptionHelper.unwrapCause(RejectedExecutionException.class, throwable); if (oException.isPresent()) { return Observable.error(new VolumeToBusyExecutionException(oException.get())); } else { return Observable.error(throwable); } }); } @Override public Observable<WriteStreamBlob> putDataStream(SfsVertx vertx, final long length) { return Defer.aVoid() .doOnNext(aVoid -> { checkStarted(); Preconditions.checkArgument(length >= 0, "Length must be >= 0"); }) .flatMap(aVoid -> allocate(length)) .flatMap(allocatedPosition -> { final long headerPosition = allocatedPosition.getHeaderPosition(); return RangeLock.lockedObservable( vertx, () -> indexFile.tryWriteLock(headerPosition, indexBlockSize), () -> Observable.defer(() -> { final long dataPosition = allocatedPosition.getDataPosition(); XVolume.XIndexBlock xHeader = XVolume.XIndexBlock.newBuilder() .setDataLength(length) .setAcknowledged(false) .setUpdatedTs(System.currentTimeMillis()) .setDeleted(false) .setGarbageCollected(false) .setDataPosition(dataPosition) .build(); return setIndexBlock0(vertx, headerPosition, xHeader) .onErrorResumeNext(throwable -> { return deallocateHeaderAndData(headerPosition, dataPosition, length) .map(aVoid1 -> { if (throwable instanceof RuntimeException) { throw (RuntimeException) throwable; } else { throw new RuntimeException(throwable); } }); }) .map(aVoid -> { WriteStreamBlob writeStreamBlob = new WriteStreamBlob(volumeId, headerPosition, length) { @Override public Observable<Void> consume(ReadStream<Buffer> src) { return Defer.aVoid() .flatMap(aVoid1 -> blobFile.consume(vertx, dataPosition, length, src)) .flatMap(aVoid1 -> blobFile.force(vertx, false)) .onErrorResumeNext(throwable -> { Optional<RejectedExecutionException> oException = ExceptionHelper.unwrapCause(RejectedExecutionException.class, throwable); if (oException.isPresent()) { return Observable.error(new VolumeToBusyExecutionException(oException.get())); } else { return Observable.error(throwable); } }); } }; return writeStreamBlob; }); }), LOCK_WAIT_TIMEOUT) .flatMap(writeStreamBlob -> indexFile.force(vertx, false) .map(aVoid1 -> writeStreamBlob)); }) .onErrorResumeNext(throwable -> { Optional<RejectedExecutionException> oException = ExceptionHelper.unwrapCause(RejectedExecutionException.class, throwable); if (oException.isPresent()) { return Observable.error(new VolumeToBusyExecutionException(oException.get())); } else { return Observable.error(throwable); } }); } @Override public Observable<Optional<HeaderBlob>> acknowledge(SfsVertx vertx, final long position) { return Defer.aVoid() .doOnNext(aVoid -> checkStarted()) .flatMap(aVoid -> RangeLock.lockedObservable( vertx, () -> indexFile.tryWriteLock(position, indexBlockSize), () -> Defer.aVoid() .flatMap(aVoid2 -> getIndexBlock0(vertx, position)) .filter(Optional::isPresent) .map(Optional::get) .filter(positional -> !positional.getValue().getGarbageCollected()) .filter(positional -> !positional.getValue().getDeleted()) .flatMap(positional -> { XVolume.XIndexBlock header = positional.getValue(); XVolume.XIndexBlock updated = header.toBuilder() .setAcknowledged(true) .setDeleted(false) .setUpdatedTs(System.currentTimeMillis()) .build(); return setIndexBlock0(vertx, positional.getPosition(), updated) .map(aVoid1 -> Optional.of(new HeaderBlob(volumeId, position, header.getDataLength()))); }) .singleOrDefault(Optional.absent()), LOCK_WAIT_TIMEOUT ).flatMap(headerBlobOptional -> indexFile.force(vertx, false) .map(aVoid1 -> headerBlobOptional)) ) .onErrorResumeNext(throwable -> { Optional<RejectedExecutionException> oException = ExceptionHelper.unwrapCause(RejectedExecutionException.class, throwable); if (oException.isPresent()) { return Observable.error(new VolumeToBusyExecutionException(oException.get())); } else { return Observable.error(throwable); } }); } @Override public Observable<Optional<HeaderBlob>> delete(SfsVertx vertx, final long position) { return Defer.aVoid() .doOnNext(aVoid -> checkStarted()) .flatMap(aVoid -> RangeLock.lockedObservable(vertx, () -> indexFile.tryWriteLock(position, indexBlockSize), () -> getIndexBlock0(vertx, position) .filter(Optional::isPresent) .map(Optional::get) .filter(positional -> !positional.getValue().getGarbageCollected()) .flatMap(positional -> { XVolume.XIndexBlock header = positional.getValue(); if (header.getDeleted()) { return Observable.just(Optional.of(new HeaderBlob(volumeId, position, header.getDataLength()))); } else { XVolume.XIndexBlock updated = header.toBuilder() .setDeleted(true) .setUpdatedTs(System.currentTimeMillis()) .build(); return setIndexBlock0(vertx, positional.getPosition(), updated) .map(aVoid1 -> Optional.of(new HeaderBlob(volumeId, position, header.getDataLength()))); } }) .singleOrDefault(Optional.absent()), LOCK_WAIT_TIMEOUT) .flatMap(headerBlobOptional -> indexFile.force(vertx, false) .map(aVoid1 -> headerBlobOptional)) ) .onErrorResumeNext(throwable -> { Optional<RejectedExecutionException> oException = ExceptionHelper.unwrapCause(RejectedExecutionException.class, throwable); if (oException.isPresent()) { return Observable.error(new VolumeToBusyExecutionException(oException.get())); } else { return Observable.error(throwable); } }); } protected Observable<Optional<ChecksummedPositional<XVolume.XIndexBlock>>> getIndexBlock0(SfsVertx vertx, final long position) { return indexFile.getBlock(vertx, position); } protected Observable<Void> setIndexBlock0(SfsVertx vertx, final long position, final XVolume.XIndexBlock header) { return indexFile.setBlock(vertx, position, header); } protected void checkAligned(long value, int blockSize) { Preconditions.checkState(value % blockSize == 0, "%s is not multiple of %s", value, blockSize); } private void checkStarted() { if (!Status.STARTED.equals(volumeState.get())) { throw new VolumeStoppedException(); } } protected Observable<Void> garbageCollection(SfsVertx vertx) { return Defer.aVoid() .doOnNext(aVoid -> checkStarted()) .flatMap(aVoid -> { AtomicBoolean stopped = new AtomicBoolean(false); return Observable.using( () -> gcState.compareAndSet(GcState.STOPPED, GcState.EXECUTING), locked -> { if (Boolean.TRUE.equals(locked)) { if (logger.isDebugEnabled()) { logger.debug("Started Garbage Collection " + basePath.toString()); } return scanIndex(vertx, IndexBlockReader.LockType.READ, checksummedPositional -> { checkStarted(); XVolume.XIndexBlock xHeader = checksummedPositional.getValue(); boolean shouldDeallocate = false; if (xHeader.getUpdatedTs() > 0) { long age = System.currentTimeMillis() - xHeader.getUpdatedTs(); if (age >= MAX_GC_AGE && !xHeader.getGarbageCollected() && (xHeader.getDeleted() || !xHeader.getAcknowledged())) { shouldDeallocate = true; } } if (shouldDeallocate && indexFileAllocator.getNumberOfFreeRanges() < MAX_FREE_RANGES && dataFileAllocator.getNumberOfFreeRanges() < MAX_FREE_RANGES) { final long headerPosition = checksummedPositional.getPosition(); long dataPosition = xHeader.getDataPosition(); long dataLength = xHeader.getDataLength(); if (gcLogger.isDebugEnabled()) { gcLogger.debug(String.format("GC Recycle Blocks {%s/%d/%d %d/%d} = {%s}", BaseEncoding.base64().encode(checksummedPositional.getActualChecksum()), headerPosition, indexBlockSize, dataPosition, dataLength, xHeader)); } return RangeLock.lockedObservable(vertx, () -> indexFile.tryWriteLock(headerPosition, indexBlockSize), () -> Defer.aVoid() .flatMap(aVoid1 -> getIndexBlock0(vertx, headerPosition)) .map(Optional::get) .filter(optimisticLockChecksummedPositional -> Arrays.equals(checksummedPositional.getActualChecksum(), optimisticLockChecksummedPositional.getActualChecksum())) .flatMap(optimisticLockChecksummedPositional -> { XVolume.XIndexBlock updated = xHeader.toBuilder() .setUpdatedTs(System.currentTimeMillis()) .setGarbageCollected(true) .setDeleted(true) .build(); return setIndexBlock0(vertx, headerPosition, updated); }) .flatMap(aVoid1 -> deallocateHeaderAndData(headerPosition, dataPosition, dataLength)) .singleOrDefault(null), LOCK_WAIT_TIMEOUT); } else { return Observable.just(null); } }).flatMap(aVoid1 -> indexFile.force(vertx, false) ).doOnNext(aVoid1 -> { if (logger.isDebugEnabled()) { logger.debug("Finished Garbage Collection " + basePath.toString()); } stopped.compareAndSet(false, true); Preconditions.checkState(gcState.compareAndSet(GcState.EXECUTING, GcState.STOPPED), "Concurrent modification"); }); } else { return Observable.just(null); } }, locked -> { if (locked && !stopped.get()) { Preconditions.checkState(gcState.compareAndSet(GcState.EXECUTING, GcState.STOPPED), "Concurrent modification"); } }, true ); }); } protected Observable<AllocatedPosition> allocate(final long dataLength) { return Observable.defer(() -> { long indexPosition = indexFileAllocator.allocNextAvailable(indexBlockSize); long dataPosition = dataFileAllocator.allocNextAvailable(dataLength); AllocatedPosition allocatedPosition = new AllocatedPosition() .setHeaderPosition(indexPosition) .setDataPosition(dataPosition); return Observable.just(allocatedPosition); }); } protected Observable<Void> deallocateHeaderAndData(final long headerPosition, final long dataPosition, final long dataLength) { return Observable.defer(() -> { checkAligned(dataPosition, dataBlockSize); checkAligned(headerPosition, indexBlockSize); dataFileAllocator.free(dataPosition, dataLength); indexFileAllocator.free(headerPosition, indexBlockSize); return Observable.just(null); }); } protected Observable<Void> scanIndex(SfsVertx vertx, IndexBlockReader.LockType lockType, Func1<ChecksummedPositional<XVolume.XIndexBlock>, Observable<Void>> transformer) { IndexScanner indexScanner = new IndexScanner(indexFile, INDEX_SCAN_BATCH_SIZE, LOCK_WAIT_TIMEOUT); return indexScanner.scanIndex(vertx, lockType, transformer); } protected static class AllocatedPosition { private long headerPosition; private long dataPosition; public AllocatedPosition() { } public AllocatedPosition setDataPosition(long dataPosition) { this.dataPosition = dataPosition; return this; } public AllocatedPosition setHeaderPosition(long headerPosition) { this.headerPosition = headerPosition; return this; } public long getHeaderPosition() { return headerPosition; } public long getDataPosition() { return dataPosition; } } protected void resumeGc() { Preconditions.checkState(gcState.compareAndSet(GcState.PAUSED, GcState.STOPPED), "Concurrent Gc State Toggle"); } protected Observable<Void> waitAndPauseGc(SfsVertx vertx) { return Observable.defer(() -> { ObservableFuture<Void> observableHandler = RxHelper.observableFuture(); waitAndPauseGc0(vertx, observableHandler); return observableHandler; }); } protected void waitAndPauseGc0(SfsVertx vertx, ObservableFuture<Void> handler) { if (gcState.compareAndSet(GcState.EXECUTING, GcState.PAUSED) || gcState.compareAndSet(GcState.STOPPED, GcState.PAUSED) || gcState.compareAndSet(GcState.PAUSED, GcState.PAUSED)) { handler.complete(null); } else { vertx.setTimer(10, event -> waitAndPauseGc0(vertx, handler)); } } }