package io.eguan.vvr.repository.core.api; /* * #%L * Project eguan * %% * Copyright (C) 2012 - 2017 Oodrive * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import static io.eguan.utils.ByteBuffers.ALLOCATE_DIRECT; import io.eguan.hash.ByteBufferDigest; import io.eguan.hash.HashAlgorithm; import io.eguan.ibs.IbsException; import io.eguan.nrs.NrsFile; import io.eguan.proto.vvr.VvrRemote; import io.eguan.utils.ByteBufferCache; import io.eguan.vvr.repository.core.api.Device.ReadWriteHandle; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Objects; import java.util.concurrent.Callable; import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; import javax.annotation.concurrent.GuardedBy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Reference implementation of a {@link ReadWriteHandle}. Utility class for the {@link Device} implementations. * * @author oodrive * @author llambert * @author ebredzinski * @author jmcaba */ public abstract class DeviceReadWriteHandleImpl implements ReadWriteHandle { /** Operation performed by the IoTask */ enum IoTaskOpe { /** Read */ READ, /** Write */ WRITE, /** Trim */ TRIM; } /** * Elementary read, write or trim operation. * */ private abstract class IoTask implements Callable<Void> { /** Data source or destination */ protected final ByteBuffer data; /** Offset in data */ protected final int dataOffset; /** Operation to perform */ protected final IoTaskOpe ope; /** Index of the block in the NRS file */ protected final long blockIndex; /** <code>true</code> when the IO request contains a single task */ protected final boolean singleTask; /** Result of the lookup of the key for the current block. Needed to revert a write operation */ protected BlockKeyLookupEx blockKeyLookupEx; /** block transaction id */ protected int txId; /** Optional builder for the peer notification of block changes */ @GuardedBy(value = "blockOpBuilder") protected VvrRemote.RemoteOperation.Builder blockOpBuilder; IoTask(final ByteBuffer data, final int dataOffset, final IoTaskOpe ope, final long blockIndex, final boolean singleTask) { super(); this.data = data == null ? null : data.duplicate(); this.dataOffset = dataOffset; this.ope = ope; this.blockIndex = blockIndex; this.singleTask = singleTask; } final void setTxId(final int txId) { this.txId = txId; } final void setBlockOpBuilder(final VvrRemote.RemoteOperation.Builder blockOpBuilder) { this.blockOpBuilder = blockOpBuilder; } /** * Write the given to data. The position and the limit of the buffer must be set in the source. * * @param source * data to read. */ final void writeToData(final ByteBuffer source) { data.position(dataOffset); data.limit(dataOffset + source.remaining()); data.put(source); } /** * Fill the destination buffer with the contents of data. * * @param dest */ final void readFromData(final ByteBuffer dest) { data.position(dataOffset); data.limit(dataOffset + dest.remaining()); dest.put(data); } /** * Revert the Nrs changes performed by the IO task. Does nothing if <code>blockKeyLookupEx</code> is * <code>null</code>. * * @throws IOException */ final void revertNrs() throws IOException { if (blockKeyLookupEx == null) { // Nothing to revert return; } restoreKey(blockIndex, blockKeyLookupEx); } } /** * Read, write or trim a whole block. * * */ private final class FullIoTask extends IoTask { FullIoTask(final ByteBuffer data, final int dataOffset, final IoTaskOpe ope, final long blockIndex, final boolean singleTask) { super(data, dataOffset, ope, blockIndex, singleTask); } @Override public final Void call() throws Exception { // Look for the block, among parents for the read final byte[] oldKey; if (ope == IoTaskOpe.WRITE && singleTask && !canReplaceOldKey()) { // Single write and no replace: replace does not replace, so no need to look for the previous block oldKey = null; } else { // Must save the old key in case of revert, no need for a recursive search for write blockKeyLookupEx = lookupBlockKeyEx(blockIndex, ope == IoTaskOpe.READ); if (blockKeyLookupEx == null) { // Not found, still will have to reset key on revert blockKeyLookupEx = BlockKeyLookupEx.NOT_FOUND; oldKey = null; } else { oldKey = blockKeyLookupEx.getKey(); assert ope == IoTaskOpe.READ || blockKeyLookupEx.isSourceCurrent(); } } if (ope == IoTaskOpe.READ) { if (oldKey == null || oldKey == NrsFile.HASH_TRIMMED) { // Fill with 0 final ByteBuffer source = blockZero.duplicate(); assert source.position() == 0; assert source.limit() == blockSize; assert source.capacity() == blockSize; writeToData(source); } else { // Load block // The native code can not make concurrent accesses to a HeapByteBuffer (native access to // underlying byte array) if (singleTask || data.isDirect()) { // Can safely write into data fillBlock(blockIndex, oldKey, data, dataOffset, blockKeyLookupEx); } else { final ByteBuffer source = getBlock(blockIndex, oldKey, blockKeyLookupEx, true); try { source.rewind(); writeToData(source); } finally { releaseBlock(source); } } } } else if (ope == IoTaskOpe.WRITE) { // Write: store the new block storeBlock(data, dataOffset, blockIndex, oldKey, txId, blockOpBuilder); } else if (ope == IoTaskOpe.TRIM) { // Trim the block trimBlock(blockIndex); } else { throw new AssertionError("ope=" + ope); } return null; } } /** * Read, write or trim a part of a block. * */ private final class PartialIoTask extends IoTask { final private int ioBlkOffset; final private int ioBlkLength; PartialIoTask(final ByteBuffer data, final int dataOffset, final IoTaskOpe ope, final long blockIndex, final boolean singleTask, final int ioBlkOffset, final int ioBlkLength) { super(data, dataOffset, ope, blockIndex, singleTask); this.ioBlkOffset = ioBlkOffset; this.ioBlkLength = ioBlkLength; } @Override public final Void call() throws Exception { // Can not trim part of a block if (ope == IoTaskOpe.TRIM) { // Nothing to do return null; } // Read or write operation final boolean read = ope == IoTaskOpe.READ; // Look for the block. Need the previous block for write final byte[] oldKey; blockKeyLookupEx = lookupBlockKeyEx(blockIndex, true); if (blockKeyLookupEx == null) { // Not found, still will have to reset key on revert blockKeyLookupEx = BlockKeyLookupEx.NOT_FOUND; oldKey = null; } else { oldKey = blockKeyLookupEx.getKey(); } if ((oldKey == null || oldKey == NrsFile.HASH_TRIMMED) && read) { // Write 0 to caller buffer, can ignore ioBlkOffset final ByteBuffer source = blockZero.duplicate(); assert source.position() == 0; assert source.limit() == blockSize; assert source.capacity() == blockSize; source.limit(ioBlkLength); writeToData(source); return null; } // Get the previous block final ByteBuffer prevBlock; if (oldKey == null || oldKey == NrsFile.HASH_TRIMMED) { // Case write: allocate a new buffer filled with 0 // TODO: really need to clear the block? assert !read; prevBlock = allocateBlock(true); } else { prevBlock = getBlock(blockIndex, oldKey, blockKeyLookupEx, read); } try { // Set block window prevBlock.position(ioBlkOffset); prevBlock.limit(ioBlkOffset + ioBlkLength); if (read) { writeToData(prevBlock); } else { // Get data and store buffer. No lock here readFromData(prevBlock); // Write the whole block. No replace if the oldKey is from a previous snapshot storeBlock(prevBlock, 0, blockIndex, blockKeyLookupEx != null && blockKeyLookupEx.isSourceCurrent() && oldKey != NrsFile.HASH_TRIMMED ? oldKey : null, txId, blockOpBuilder); } } finally { releaseBlock(prevBlock); } return null; } } /** * A {@link IoRequest} execute the {@link IoTask} and make sure that all or none are executed. * */ static final class IoRequest { /** Operation to perform */ private final IoTaskOpe ope; /** offset in the device */ private final long offset; /** IO length */ private final int length; /** Tasks to perform */ private final List<IoTask> ioTasks; private final DeviceReadWriteHandleImpl deviceReadWriteHandleImpl; /** * Create a new {@link IoRequest} * * @param ope * Operation to perform. * @param offset * offset in device * @param length * IO length * @param ioTasks * {@link IoTask} to perform. * @param deviceReadWriteHandleImpl */ IoRequest(@Nonnull final IoTaskOpe ope, final long offset, final int length, final List<IoTask> ioTasks, final DeviceReadWriteHandleImpl deviceReadWriteHandleImpl) { super(); assert offset >= 0; assert length > 0; assert ioTasks == null || ioTasks.size() > 0; // ioTasks is null in unit tests this.ope = Objects.requireNonNull(ope); this.offset = offset; this.length = length; this.ioTasks = ioTasks; this.deviceReadWriteHandleImpl = deviceReadWriteHandleImpl; } final boolean isRead() { return ope == IoTaskOpe.READ; } final boolean isWrite() { return ope == IoTaskOpe.WRITE; } final boolean isTrim() { return ope == IoTaskOpe.TRIM; } /** * Executes the {@link IoRequest}. * * @throws IOException */ final void exec() throws IOException { // Update block notifications (null for a read or a trim request) final boolean write = isWrite(); final VvrRemote.RemoteOperation.Builder blockOpBuilder = write && deviceReadWriteHandleImpl.needsBlockOpBuilder() ? VvrRemote.RemoteOperation.newBuilder() : null; final int txId = (write && ioTasks.size() > 1) ? deviceReadWriteHandleImpl.createBlockTransaction() : -1; boolean done = false; // Ready to revert on any throwable try { // Add block IO parameters if (write) { for (int i = ioTasks.size() - 1; i >= 0; i--) { final IoTask ioTask = ioTasks.get(i); ioTask.setTxId(txId); ioTask.setBlockOpBuilder(blockOpBuilder); } } // Single task in the current thread if (ioTasks.size() == 1) { try { ioTasks.get(0).call(); } catch (final IOException e) { throw e; } catch (final Exception e) { throw new IOException(e); } } else if (SINGLE_THREADED) { // Single threaded IOs try { for (int i = ioTasks.size() - 1; i >= 0; i--) { final IoTask ioTask = ioTasks.get(i); ioTask.call(); } } catch (final IOException e) { throw e; } catch (final Exception e) { throw new IOException(e); } } else { // Run all in the IO executor try { final List<Future<Void>> result = IO_EXEC.invokeAll(ioTasks); // Get exceptions, if any for (int i = result.size() - 1; i >= 0; i--) { final Future<Void> future = result.get(i); try { future.get(); } catch (final ExecutionException e) { // Create new exception to get the full stack (thread exec and current thread) throw new IOException(e.getCause()); } } } catch (CancellationException | InterruptedException e) { throw new IOException(e); } } // Block: commit changes and notify peers if (txId > 0) { deviceReadWriteHandleImpl.commitBlockTransaction(txId); done = true; } if (blockOpBuilder != null) { deviceReadWriteHandleImpl.notifyBlockIO(blockOpBuilder); } } catch (IOException | RuntimeException | Error e) { // Revert Nrs changes if (write || isTrim()) { revertNrs(); } throw e; } finally { // Rolls back block transaction on error if (txId > 0 && !done) { deviceReadWriteHandleImpl.rollbackBlockTransaction(txId); } } } /** * Revert the Nrs changes made by the {@link IoTask}s of the request. */ private final void revertNrs() { for (int i = ioTasks.size() - 1; i >= 0; i--) { final IoTask ioTask = ioTasks.get(i); try { ioTask.revertNrs(); } catch (final Exception e) { LOGGER.warn("Failed to revert IO operation", e); } } } /** * Check if some bits are in common between this and the other {@link IoRequest}. * * @param other * @return <code>true</code> if the two requests overlap */ final boolean overlap(final IoRequest other) { if (offset == other.offset) { // Both length are >0 return true; } else if (offset > other.offset) { if (offset < (other.offset + other.length)) { return true; } } else { // other.offset > offset if (other.offset < (offset + length)) { return true; } } return false; } } private static final Logger LOGGER = LoggerFactory.getLogger(DeviceReadWriteHandleImpl.class); /** ByteBufferCache to reuse {@link ByteBuffer}s, direct or non direct */ private static final ByteBufferCache BYTE_BUFFER_CACHE = new ByteBufferCache(ALLOCATE_DIRECT ? 0 : Integer.MAX_VALUE); /** Wait request timeout (in seconds) */ private static final long DO_NOT_WAIT_FOREVER = 5; /** true to make single threaded IOs */ private static final boolean SINGLE_THREADED = Boolean.getBoolean("io.eguan.vvr.io.singleThreaded"); /** Pool of blocks filled with 0. The key is the buffer size, the value is the block to read */ private static final HashMap<Integer, ByteBuffer> BLOCK_POOL = new HashMap<>(4); /** Executor to execute IO requests in parallel */ private static final ExecutorService IO_EXEC = Executors.newFixedThreadPool( Math.max(2, Runtime.getRuntime().availableProcessors()), new ThreadFactory() { private int index = 0; @Override public final Thread newThread(final Runnable r) { index++; final Thread result = new Thread(r, "IOexec-" + index); result.setPriority(Thread.NORM_PRIORITY + 3); result.setDaemon(true); return result; } }); /** Link to the Device */ protected final AbstractDeviceImplHelper deviceImplHelper; /** Hash algorithm to hash blocks. */ private final HashAlgorithm hashAlgorithm; /** True if the device is opened read-only. */ private final boolean readOnly; /** Block size of the device. Not private to avoid synthetic access from IO tasks. */ protected final int blockSize; /** Block filled with 0. Not private to avoid synthetic access from inner classes */ final ByteBuffer blockZero; /** <code>true</code> when the handle is closed */ @GuardedBy(value = "closedLock") private boolean closed = false; private final ReadWriteLock closedLock = new ReentrantReadWriteLock(); /** List of {@link IoRequest} to execute */ @GuardedBy(value = "pendingIoRequestsLock") private final List<IoRequest> pendingIoRequests = new ArrayList<>(); private final ReadWriteLock pendingIoRequestsLock = new ReentrantReadWriteLock(); protected DeviceReadWriteHandleImpl(final AbstractDeviceImplHelper deviceImplHelper, final HashAlgorithm hashAlgorithm, final boolean readOnly, final int blockSize) { this.deviceImplHelper = deviceImplHelper; this.hashAlgorithm = hashAlgorithm; this.readOnly = readOnly; this.blockSize = blockSize; // Get / create buffer filled with 0 ByteBuffer blockZeroTmp; synchronized (BLOCK_POOL) { final Integer blockSizeInteger = Integer.valueOf(blockSize); blockZeroTmp = BLOCK_POOL.get(blockSizeInteger); if (blockZeroTmp == null) { blockZeroTmp = ALLOCATE_DIRECT ? ByteBuffer.allocateDirect(blockSize) : ByteBuffer.allocate(blockSize); BLOCK_POOL.put(blockSizeInteger, blockZeroTmp); } } this.blockZero = blockZeroTmp; } /* * (non-Javadoc) * * @see io.eguan.vvr.repository.core.api.Device.ReadWriteHandle#read(io.eguan.vvr.io.ByteDataSink, * int, int, long) */ @Override public final void read(@Nonnull final ByteBuffer destination, @Nonnegative final int destinationOffset, @Nonnegative final int length, @Nonnegative final long devOffset) throws IOException { try { performIo(IoTaskOpe.READ, destination, destinationOffset, length, devOffset); } catch (IOException | RuntimeException | Error e) { LOGGER.debug("Read error", e); throw e; } } /* * (non-Javadoc) * * @see * io.eguan.vvr.repository.core.api.Device.ReadWriteHandle#write(io.eguan.vvr.io.ByteDataSource, * int, int, long) */ @Override public final void write(@Nonnull final ByteBuffer source, @Nonnegative final int sourceOffset, @Nonnegative final int length, @Nonnegative final long devOffset) throws IOException { try { if (readOnly) { throw new IOException("Read only"); } performIo(IoTaskOpe.WRITE, source, sourceOffset, length, devOffset); } catch (IOException | RuntimeException | Error e) { LOGGER.debug("Write error", e); throw e; } } /* * (non-Javadoc) * * @see * io.eguan.vvr.repository.core.api.Device.ReadWriteHandle#trim(io.eguan.vvr.io.ByteDataSource, * int, int, long) */ @Override public final void trim(@Nonnegative long lengthLong, @Nonnegative long devOffset) { try { if (readOnly) { throw new IOException("Read only"); } // Check length positive if (lengthLong < 0) { throw new IOException("Negative length=" + lengthLong); } // Handle possible overflow while (lengthLong > Integer.MAX_VALUE) { performIo(IoTaskOpe.TRIM, null, 0, Integer.MAX_VALUE, devOffset); lengthLong -= Integer.MAX_VALUE; devOffset += Integer.MAX_VALUE; } // Remaining length final int length = (int) (Integer.MAX_VALUE & lengthLong); performIo(IoTaskOpe.TRIM, null, 0, length, devOffset); } catch (IOException | RuntimeException | Error e) { LOGGER.warn("Trim error", e); } } /** * Prepare and perform read or write requests. * * @param ope * @param buffer * @param bufferOffset * @param length * @param devOffset * @throws IOException */ private final void performIo(final IoTaskOpe ope, final ByteBuffer buffer, @Nonnegative final int bufferOffset, @Nonnegative final int length, @Nonnegative final long devOffset) throws IOException { checkIoRange(ope, buffer, bufferOffset, length, devOffset); final List<IoTask> ioTasks = prepareIo(ope, buffer, bufferOffset, length, devOffset); if (ioTasks.isEmpty()) { return; } final IoRequest ioRequest = new IoRequest(ope, devOffset, length, ioTasks, this); execIoRequest(buffer, length, ioRequest); } /* * (non-Javadoc) * * @see io.eguan.vvr.repository.core.api.Device.ReadWriteHandle#getSize() */ @Override public final long getSize() { return deviceImplHelper.getSize(); } /* * (non-Javadoc) * * @see io.eguan.vvr.repository.core.api.Device.ReadWriteHandle#getBlockSize() */ @Override public final int getBlockSize() { return blockSize; } /* * (non-Javadoc) * * @see io.eguan.vvr.repository.core.api.Device.ReadWriteHandle#close() */ @Override public final void close() { // Forbid close during IO closedLock.writeLock().lock(); try { this.closed = true; } finally { closedLock.writeLock().unlock(); } } /** * Look for a block in the device and its parents. * <p> * Not private to avoid synthetic access from IO tasks. * * @param blockIndex * @param recursive * @return the block key extended lookup in the NRS or <code>null</code> * @throws IOException */ final BlockKeyLookupEx lookupBlockKeyEx(final long blockIndex, final boolean recursive) throws IOException { return deviceImplHelper.lookupBlockKeyEx(blockIndex, recursive); } /** * Create a transaction for the support of multiple block IOs. * * @return the transaction ID, strictly positive. */ protected abstract int createBlockTransaction() throws IOException; /** * Commits the transaction on blocks. * * @param txId * the id of the transaction */ protected abstract void commitBlockTransaction(int txId) throws IOException; /** * Rolls back the transaction on blocks. * * @param txId * the id of the transaction */ protected abstract void rollbackBlockTransaction(int txId) throws IOException; /** * Tells if the backing store needs the lookup of the previous key to optimize storage. * * @return <code>true</code> if the previous key should be looked-up. */ protected abstract boolean canReplaceOldKey(); /** * True if the op builder must be created to notify remote nodes. * * @return <code>true</code> if an opBuilder must be created to notify remote nodes. */ protected abstract boolean needsBlockOpBuilder(); /** * Notify peers for the update of blocks. * * @param blockOpBuilder */ protected abstract void notifyBlockIO(VvrRemote.RemoteOperation.Builder blockOpBuilder); final void storeBlock(final ByteBuffer block, final int offset, final long blockIndex, final byte[] oldKey, final int ibsTxId, final VvrRemote.RemoteOperation.Builder opBuilder) throws IbsException, IllegalArgumentException, IndexOutOfBoundsException, NullPointerException, IOException { // Compute hash on source. Must set position and limit block.position(offset); block.limit(offset + blockSize); final byte[] newKey = ByteBufferDigest.digest(hashAlgorithm, block); // Same key: ignore operation if (Arrays.equals(oldKey, newKey)) { return; } storeNewBlock(block, offset, blockIndex, newKey, oldKey, ibsTxId, opBuilder); } /** * Store the given block in the backing store. * <p> * Note: not private to avoid synthetic access from IO tasks. * * @param block * @param offset * @param blockIndex * @param oldKey * the old key for replace, may be <code>null</code> * @param txId * valid transaction ID if >0. * @param opBuilder * @throws IllegalArgumentException * @throws IndexOutOfBoundsException * @throws NullPointerException * @throws IOException */ protected abstract void storeNewBlock(final ByteBuffer block, final int offset, final long blockIndex, final byte[] newKey, final byte[] oldKey, final int txId, final VvrRemote.RemoteOperation.Builder opBuilder) throws IllegalArgumentException, IndexOutOfBoundsException, NullPointerException, IOException; /** * Trim the block in the persistence. * <p> * Note: not private to avoid synthetic access from IO tasks. * * @param blockIndex * @throws IOException */ final void trimBlock(final long blockIndex) throws IOException { // Trim key in persistence deviceImplHelper.trimBlockKey(blockIndex); } /** * Revert the previous key for the given block index. * * @param blockIndex * @param blockKeyLookupEx * @throws IOException */ final void restoreKey(final long blockIndex, @Nonnull final BlockKeyLookupEx blockKeyLookupEx) throws IOException { // Restore old key or reset entry final byte[] oldKey; if (blockKeyLookupEx == BlockKeyLookupEx.NOT_FOUND || !blockKeyLookupEx.isSourceCurrent()) { // Reset key oldKey = null; deviceImplHelper.resetBlockKey(blockIndex); } else { // Revert previous key oldKey = blockKeyLookupEx.getKey(); deviceImplHelper.writeBlockKey(blockIndex, oldKey); } } /** * Gets a {@link ByteBuffer} filled with the data associated to <code>key</code>. * * @param blockIndex * @param key * @param blockKeyLookupEx * source of the key * @param readOnly * <code>true</code> if the buffer will be only read * @return a new {@link ByteBuffer} filled with the data associated to <code>key</code>. The block may be released * if it's not read-only. * @throws InterruptedException */ protected abstract ByteBuffer getBlock(final long blockIndex, final byte[] key, final BlockKeyLookupEx blockKeyLookupEx, final boolean readOnly) throws IOException, InterruptedException; /** * Fills <code>data</code> with the contents of the block associated to <code>key</code>. * * @param blockIndex * @param key * @param data * @param dataOffset * @param blockKeyLookupEx * source of the key * @return the <code>data</code> length written in data * @throws IOException * @throws InterruptedException */ protected abstract void fillBlock(final long blockIndex, final byte[] key, final ByteBuffer data, final int dataOffset, final BlockKeyLookupEx blockKeyLookupEx) throws IOException, InterruptedException; /** * Allocate a (potentially) used block. * * @param clear * if <code>true</code>, the block is set to 0 before being returned. * @return the allocated block. */ protected final ByteBuffer allocateBlock(final boolean clear) { final ByteBuffer block = BYTE_BUFFER_CACHE.allocate(blockSize); if (clear) { final ByteBuffer source = blockZero.duplicate(); assert source.position() == 0; assert source.limit() == blockSize; assert source.capacity() == blockSize; block.put(source); assert source.position() == blockSize; assert block.position() == block.capacity(); block.clear(); } return block; } /** * Release an allocated block. * * @param block * block to release. */ protected final void releaseBlock(final ByteBuffer block) { // Do not release read-only views of ByteString if (!block.isReadOnly()) { BYTE_BUFFER_CACHE.release(block); } } /** * Check IO range in device and in buffer. * * @param data * source or destination * @param dataOffset * offset in data * @param length * IO length * @param devOffset * offset in device * @throws IOException * on failure */ private final void checkIoRange(final IoTaskOpe ope, final ByteBuffer data, final int dataOffset, final int length, final long devOffset) throws IOException { // Accept length=0 if (length < 0) { throw new IOException("Negative length=" + length); } // Positive offsets if (dataOffset < 0) { throw new IOException("Negative dataOffset=" + dataOffset); } if (devOffset < 0) { throw new IOException("Negative devOffset=" + devOffset); } // Check data overflow if (ope != IoTaskOpe.TRIM) { final int eoIo = dataOffset + length; if (eoIo > data.capacity()) { throw new IOException("Overflow, size=" + getSize() + ", end offset=" + eoIo); } } // Check device overflow { final long eoIo = devOffset + length; if (eoIo > getSize()) { throw new IOException("Overflow, size=" + getSize() + ", end offset=" + eoIo); } } } /** * Create the list of IO tasks to perform. * * @param ope * @param data * @param offset * @param length * @param devOffset * @return list of IO tasks to perform. May be empty */ private final List<IoTask> prepareIo(final IoTaskOpe ope, final ByteBuffer data, final int offset, final int length, final long devOffset) { // Something to do? if (length <= 0) { return Collections.emptyList(); } // First block: compute index and segment of data involved final ArrayList<IoTask> result = new ArrayList<>((length / blockSize) + 1); final boolean singleTask; long blockIndex = devOffset / blockSize; int ioBlkOffset = (int) (devOffset % blockSize); // length to read/write inside the first block int ioBlkLength; if ((ioBlkOffset + length) <= blockSize) { // One IO, in one block ioBlkLength = length; // Single IO: no need to prepare a revert singleTask = true; } else { // Multiple IOs: first IO on end of the first block ioBlkLength = blockSize - ioBlkOffset; // Multi IO: may have to revert write access singleTask = false; } result.add(newIoTask(data, offset, ope, blockIndex, singleTask, ioBlkOffset, ioBlkLength)); // Next blocks int ioLengthRemaining = length - ioBlkLength; int dataOffset = offset + ioBlkLength; while (ioLengthRemaining > 0) { // New block blockIndex++; ioBlkOffset = 0; ioBlkLength = ioLengthRemaining >= blockSize ? blockSize : ioLengthRemaining; result.add(newIoTask(data, dataOffset, ope, blockIndex, singleTask, ioBlkOffset, ioBlkLength)); ioLengthRemaining -= blockSize; dataOffset += blockSize; } // Check IO task list consistency assert (result.size() > 1 && !singleTask) || (result.size() == 1 && singleTask); return result; } /** * Create a task to perform the given IO operation. * * @param data * @param dataOffset * @param ope * @param blockIndex * @param singleTask * @param ioBlkOffset * @param ioBlkLength * @return IO task for the given parameters */ private final IoTask newIoTask(final ByteBuffer data, final int dataOffset, final IoTaskOpe ope, final long blockIndex, final boolean singleTask, final int ioBlkOffset, final int ioBlkLength) { if (ioBlkOffset == 0 && ioBlkLength == blockSize) { return new FullIoTask(data, dataOffset, ope, blockIndex, singleTask); } return new PartialIoTask(data, dataOffset, ope, blockIndex, singleTask, ioBlkOffset, ioBlkLength); } /** * Run the {@link IoTask} to read from or write to the given {@link ByteBuffer}. The requests are put in a queue and * the requests are executed in the FIFO order. Read requests may be executed if there is no conflict with a pending * write or trim. * * @param buffer * @param length * @param ioRequest * @throws IOException */ private final void execIoRequest(final ByteBuffer buffer, final int length, final IoRequest ioRequest) throws IOException { // Forbid close during IO closedLock.readLock().lock(); try { if (closed) { throw new IOException("Closed"); } // Lock device-related lock during IO final Lock deviceLock = deviceImplHelper.getIoLock(); deviceLock.lock(); try { // Add current request in pending queue pendingIoRequestsLock.writeLock().lock(); try { pendingIoRequests.add(ioRequest); } finally { pendingIoRequestsLock.writeLock().unlock(); } try { // Wait for execution of the request boolean waitExec = true; while (waitExec) { // Can exec? pendingIoRequestsLock.readLock().lock(); try { // First request? if (pendingIoRequests.get(0) == ioRequest) { // Ready waitExec = false; break; } final boolean isRead = ioRequest.isRead(); for (int i = 0; i < pendingIoRequests.size(); i++) { final IoRequest ioRequestTmp = pendingIoRequests.get(i); if (ioRequestTmp == ioRequest) { if (isRead) { // Can start even if not the first waitExec = false; } // No need to continue checking break; } // Read request may start if no overlap with a pending write or trim if (isRead && !ioRequestTmp.isRead()) { if (ioRequest.overlap(ioRequestTmp)) { // Cannot read break; } } } } finally { pendingIoRequestsLock.readLock().unlock(); } if (waitExec) { synchronized (pendingIoRequestsLock) { pendingIoRequestsLock.wait(DO_NOT_WAIT_FOREVER * 1000); } } } if (buffer == null) { // Trim: just exec the operation ioRequest.exec(); } else { // Task execution must not change the original position final int prevPosition = buffer.position(); ioRequest.exec(); assert prevPosition == buffer.position(); // Update position on success buffer.position(prevPosition + length); } } catch (final IOException e) { throw e; } catch (final Exception e) { // Convert any exception to IOException (Interrupted, IllegalState, ...) throw new IOException(e); } finally { // Make sure the request is removed // Add current request in pending queue pendingIoRequestsLock.writeLock().lock(); try { pendingIoRequests.remove(ioRequest); } finally { pendingIoRequestsLock.writeLock().unlock(); } // Wake-up waiters synchronized (pendingIoRequestsLock) { pendingIoRequestsLock.notifyAll(); } } } finally { deviceLock.unlock(); } } finally { closedLock.readLock().unlock(); } } }