/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.util; import java.io.Closeable; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.function.Consumer; import java.util.stream.Stream; /** * This is a buffer of bytes that is accessible only using absolute indices. It is somewhat similar to * {@link ByteBuffer} but is not restricted to the 2GB limit. * * @author Bastian Gloeckle */ public class BigByteBuffer implements Closeable { private static final int DEFAULT_MAX_SINGLE_SIZE = 100 * 1024 * 1024; // 100 MB, to facilitate parallelism. private ByteBuffer[] byteBuffers; private Lock[] byteBufferLocks; private long totalSize; private long shardSize; public BigByteBuffer(FileChannel channel, MapMode mode, Consumer<MappedByteBuffer> initializer) throws IOException { this(channel, mode, initializer, DEFAULT_MAX_SINGLE_SIZE); } public BigByteBuffer(FileChannel channel, MapMode mode, Consumer<MappedByteBuffer> initializer, int maxSingleShardSize) throws IOException { int numberOfByteBuffers = (int) (channel.size() / maxSingleShardSize); if (channel.size() % maxSingleShardSize > 0) numberOfByteBuffers++; ByteBuffer[] bufs = new ByteBuffer[numberOfByteBuffers]; for (int i = 0; i < numberOfByteBuffers; i++) { long sizeOfBuf = maxSingleShardSize; if (i == numberOfByteBuffers - 1) sizeOfBuf = (channel.size() % maxSingleShardSize != 0) ? channel.size() % maxSingleShardSize : maxSingleShardSize; MappedByteBuffer newBuf = channel.map(mode, ((long) i) * maxSingleShardSize, sizeOfBuf); if (initializer != null) initializer.accept(newBuf); bufs[i] = newBuf; } this.byteBuffers = bufs; this.totalSize = channel.size(); this.shardSize = maxSingleShardSize; this.byteBufferLocks = new Lock[byteBuffers.length]; for (int i = 0; i < byteBufferLocks.length; i++) byteBufferLocks[i] = new ReentrantLock(); } public BigByteBuffer(byte[] bytes) { this(new ByteBuffer[] { ByteBuffer.wrap(bytes) }); } public BigByteBuffer(ByteBuffer[] byteBuffers) throws IllegalArgumentException { this(byteBuffers, findShardSize(byteBuffers)); } protected BigByteBuffer(ByteBuffer[] byteBuffers, int bufferSize) { this.byteBuffers = byteBuffers; this.shardSize = bufferSize; this.totalSize = Stream.<ByteBuffer> of(byteBuffers).mapToLong(buf -> buf.limit()).sum(); this.byteBufferLocks = new Lock[byteBuffers.length]; for (int i = 0; i < byteBufferLocks.length; i++) byteBufferLocks[i] = new ReentrantLock(); } private static int findShardSize(ByteBuffer[] bufs) throws IllegalArgumentException { if (bufs.length > 1) { // ensure all buffers unless the last one have the same length // the last buffer must have a length <= the other ones. long numberOfDistinctLengths = Stream.of(bufs).limit(bufs.length - 1).mapToInt(b -> b.limit()).distinct().count(); if (numberOfDistinctLengths != 1 || bufs[bufs.length - 1].limit() > bufs[0].limit()) throw new IllegalArgumentException("The provided ByteBuffers have invalid lengths."); } return bufs[0].limit(); } /** * Get a single byte from a specific index. */ public byte get(long byteIdx) throws ArrayIndexOutOfBoundsException { if (byteIdx < 0 || byteIdx >= totalSize) throw new ArrayIndexOutOfBoundsException("Tried to access index " + byteIdx + " on buffer of size " + totalSize); int bufIdx = (int) (byteIdx / shardSize); int idx = (int) (byteIdx % shardSize); return byteBuffers[bufIdx].get(idx); } /** * Get an array of bytes from the buffer, similar to {@link InputStream#read(byte[], int, int)}. * * @param byteIdx * The index of the first byte in the buffer to read. * @return number of bytes actually read. */ public int get(long byteIdx, byte[] target, int targetOffset, int length) { if (target == null) throw new NullPointerException(); if (byteIdx == totalSize) return -1; if (byteIdx < 0 || byteIdx > totalSize || length < 0 || target.length < targetOffset + length) throw new ArrayIndexOutOfBoundsException( "Tried to access index " + byteIdx + " length " + length + " but size available is " + totalSize + ". Target arrays length is " + target.length + ", target offset " + targetOffset); if (length == 0) return 0; length = (int) Math.min(length, totalSize - byteIdx); int bufIdx = (int) (byteIdx / shardSize); int idx = (int) (byteIdx % shardSize); if (idx <= shardSize - length) { // single ByteBuffer contains result. byteBufferLocks[bufIdx].lock(); try { if (byteBuffers[bufIdx].position() != idx) byteBuffers[bufIdx].position(idx); byteBuffers[bufIdx].get(target, targetOffset, length); } finally { byteBufferLocks[bufIdx].unlock(); } } else { // multiple ByteBuffers contain result. synchronized (this) { int firstLength = (int) (shardSize - idx); byteBufferLocks[bufIdx].lock(); try { if (byteBuffers[bufIdx].position() != idx) byteBuffers[bufIdx].position(idx); byteBuffers[bufIdx].get(target, targetOffset, firstLength); } finally { byteBufferLocks[bufIdx].unlock(); } int lengthLeft = length - firstLength; int i = 1; targetOffset += firstLength; while (lengthLeft > 0) { int lengthThisBuf; if (lengthLeft <= byteBuffers[bufIdx + i].limit()) lengthThisBuf = lengthLeft; else lengthThisBuf = byteBuffers[bufIdx + i].limit(); byteBufferLocks[bufIdx + i].lock(); try { byteBuffers[bufIdx + i].rewind(); byteBuffers[bufIdx + i].get(target, targetOffset, lengthThisBuf); } finally { byteBufferLocks[bufIdx + i].unlock(); } targetOffset += lengthThisBuf; lengthLeft -= lengthThisBuf; i++; } } } return length; } public long size() { return totalSize; } @Override public void close() throws IOException { byteBuffers = null; } /** * @return A new InputStream that will return all bytes contained in this {@link BigByteBuffer}. */ public InputStream createInputStream() { return createPartialInputStream(0L, totalSize); } /** * @return A new InputStream that will return the bytes of this {@link BigByteBuffer} in the given index range. */ public InputStream createPartialInputStream(long firstIdx, long lastIdxExclusive) { if (lastIdxExclusive > totalSize) return null; return new InputStream() { private long pos = firstIdx; @Override public int read() throws IOException { if (pos >= lastIdxExclusive) return -1; return BigByteBuffer.this.get(pos++); } @Override public long skip(long n) throws IOException { long skipTarget = Math.min(pos + n, lastIdxExclusive); long skippedBytes = skipTarget - pos; pos = skipTarget; return skippedBytes; } @Override public int available() throws IOException { return (int) (lastIdxExclusive - pos); } @Override public int read(byte[] b, int off, int len) throws IOException { if (pos + len > lastIdxExclusive) { len = (int) (lastIdxExclusive - pos); if (len == 0) return -1; } int read = BigByteBuffer.this.get(pos, b, off, len); pos += read; return read; } }; } }