package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.File;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.BufferUnderflowException;
import java.nio.channels.ClosedChannelException; // javadoc
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.security.AccessController;
import java.security.PrivilegedExceptionAction;
import java.security.PrivilegedActionException;
import java.lang.reflect.Method;
import org.apache.lucene.util.Constants;
/** File-based {@link Directory} implementation that uses
* mmap for reading, and {@link
* FSDirectory.FSIndexOutput} for writing.
*
* <p><b>NOTE</b>: memory mapping uses up a portion of the
* virtual memory address space in your process equal to the
* size of the file being mapped. Before using this class,
* be sure your have plenty of virtual address space, e.g. by
* using a 64 bit JRE, or a 32 bit JRE with indexes that are
* guaranteed to fit within the address space.
* On 32 bit platforms also consult {@link #setMaxChunkSize}
* if you have problems with mmap failing because of fragmented
* address space. If you get an OutOfMemoryException, it is recommended
* to reduce the chunk size, until it works.
*
* <p>Due to <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4724038">
* this bug</a> in Sun's JRE, MMapDirectory's {@link IndexInput#close}
* is unable to close the underlying OS file handle. Only when GC
* finally collects the underlying objects, which could be quite
* some time later, will the file handle be closed.
*
* <p>This will consume additional transient disk usage: on Windows,
* attempts to delete or overwrite the files will result in an
* exception; on other platforms, which typically have a "delete on
* last close" semantics, while such operations will succeed, the bytes
* are still consuming space on disk. For many applications this
* limitation is not a problem (e.g. if you have plenty of disk space,
* and you don't rely on overwriting files on Windows) but it's still
* an important limitation to be aware of.
*
* <p>This class supplies the workaround mentioned in the bug report
* (see {@link #setUseUnmap}), which may fail on
* non-Sun JVMs. It forcefully unmaps the buffer on close by using
* an undocumented internal cleanup functionality.
* {@link #UNMAP_SUPPORTED} is <code>true</code>, if the workaround
* can be enabled (with no guarantees).
* <p>
* <b>NOTE:</b> Accessing this class either directly or
* indirectly from a thread while it's interrupted can close the
* underlying channel immediately if at the same time the thread is
* blocked on IO. The channel will remain closed and subsequent access
* to {@link MMapDirectory} will throw a {@link ClosedChannelException}.
* </p>
*/
public class MMapDirectory extends FSDirectory {
private boolean useUnmapHack = UNMAP_SUPPORTED;
public static final int DEFAULT_MAX_BUFF = Constants.JRE_IS_64BIT ? (1 << 30) : (1 << 28);
private int chunkSizePower;
/** Create a new MMapDirectory for the named location.
*
* @param path the path of the directory
* @param lockFactory the lock factory to use, or null for the default
* ({@link NativeFSLockFactory});
* @throws IOException
*/
public MMapDirectory(File path, LockFactory lockFactory) throws IOException {
super(path, lockFactory);
setMaxChunkSize(DEFAULT_MAX_BUFF);
}
/** Create a new MMapDirectory for the named location and {@link NativeFSLockFactory}.
*
* @param path the path of the directory
* @throws IOException
*/
public MMapDirectory(File path) throws IOException {
super(path, null);
setMaxChunkSize(DEFAULT_MAX_BUFF);
}
/**
* <code>true</code>, if this platform supports unmapping mmapped files.
*/
public static final boolean UNMAP_SUPPORTED;
static {
boolean v;
try {
Class.forName("sun.misc.Cleaner");
Class.forName("java.nio.DirectByteBuffer")
.getMethod("cleaner");
v = true;
} catch (Exception e) {
v = false;
}
UNMAP_SUPPORTED = v;
}
/**
* This method enables the workaround for unmapping the buffers
* from address space after closing {@link IndexInput}, that is
* mentioned in the bug report. This hack may fail on non-Sun JVMs.
* It forcefully unmaps the buffer on close by using
* an undocumented internal cleanup functionality.
* <p><b>NOTE:</b> Enabling this is completely unsupported
* by Java and may lead to JVM crashes if <code>IndexInput</code>
* is closed while another thread is still accessing it (SIGSEGV).
* @throws IllegalArgumentException if {@link #UNMAP_SUPPORTED}
* is <code>false</code> and the workaround cannot be enabled.
*/
public void setUseUnmap(final boolean useUnmapHack) {
if (useUnmapHack && !UNMAP_SUPPORTED)
throw new IllegalArgumentException("Unmap hack not supported on this platform!");
this.useUnmapHack=useUnmapHack;
}
/**
* Returns <code>true</code>, if the unmap workaround is enabled.
* @see #setUseUnmap
*/
public boolean getUseUnmap() {
return useUnmapHack;
}
/**
* Try to unmap the buffer, this method silently fails if no support
* for that in the JVM. On Windows, this leads to the fact,
* that mmapped files cannot be modified or deleted.
*/
final void cleanMapping(final ByteBuffer buffer) throws IOException {
if (useUnmapHack) {
try {
AccessController.doPrivileged(new PrivilegedExceptionAction<Object>() {
public Object run() throws Exception {
final Method getCleanerMethod = buffer.getClass()
.getMethod("cleaner");
getCleanerMethod.setAccessible(true);
final Object cleaner = getCleanerMethod.invoke(buffer);
if (cleaner != null) {
cleaner.getClass().getMethod("clean")
.invoke(cleaner);
}
return null;
}
});
} catch (PrivilegedActionException e) {
final IOException ioe = new IOException("unable to unmap the mapped buffer");
ioe.initCause(e.getCause());
throw ioe;
}
}
}
/**
* Sets the maximum chunk size (default is {@link Integer#MAX_VALUE} for
* 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping.
* Especially on 32 bit platform, the address space can be very fragmented,
* so large index files cannot be mapped.
* Using a lower chunk size makes the directory implementation a little
* bit slower (as the correct chunk may be resolved on lots of seeks)
* but the chance is higher that mmap does not fail. On 64 bit
* Java platforms, this parameter should always be {@code 1 << 30},
* as the address space is big enough.
* <b>Please note:</b> This method always rounds down the chunk size
* to a power of 2.
*/
public final void setMaxChunkSize(final int maxChunkSize) {
if (maxChunkSize <= 0)
throw new IllegalArgumentException("Maximum chunk size for mmap must be >0");
//System.out.println("Requested chunk size: "+maxChunkSize);
this.chunkSizePower = 31 - Integer.numberOfLeadingZeros(maxChunkSize);
assert this.chunkSizePower >= 0 && this.chunkSizePower <= 30;
//System.out.println("Got chunk size: "+getMaxChunkSize());
}
/**
* Returns the current mmap chunk size.
* @see #setMaxChunkSize
*/
public final int getMaxChunkSize() {
return 1 << chunkSizePower;
}
/** Creates an IndexInput for the file with the given name. */
@Override
public IndexInput openInput(String name, int bufferSize) throws IOException {
ensureOpen();
File f = new File(getDirectory(), name);
RandomAccessFile raf = new RandomAccessFile(f, "r");
try {
return new MMapIndexInput("MMapIndexInput(path=\"" + f + "\")", raf, chunkSizePower);
} finally {
raf.close();
}
}
// Because Java's ByteBuffer uses an int to address the
// values, it's necessary to access a file >
// Integer.MAX_VALUE in size using multiple byte buffers.
private final class MMapIndexInput extends IndexInput {
private ByteBuffer[] buffers;
private final long length, chunkSizeMask, chunkSize;
private final int chunkSizePower;
private int curBufIndex;
private ByteBuffer curBuf; // redundant for speed: buffers[curBufIndex]
private boolean isClone = false;
MMapIndexInput(String resourceDescription, RandomAccessFile raf, int chunkSizePower) throws IOException {
super(resourceDescription);
this.length = raf.length();
this.chunkSizePower = chunkSizePower;
this.chunkSize = 1L << chunkSizePower;
this.chunkSizeMask = chunkSize - 1L;
if (chunkSizePower < 0 || chunkSizePower > 30)
throw new IllegalArgumentException("Invalid chunkSizePower used for ByteBuffer size: " + chunkSizePower);
if ((length >>> chunkSizePower) >= Integer.MAX_VALUE)
throw new IllegalArgumentException("RandomAccessFile too big for chunk size: " + raf.toString());
// we always allocate one more buffer, the last one may be a 0 byte one
final int nrBuffers = (int) (length >>> chunkSizePower) + 1;
//System.out.println("length="+length+", chunkSizePower=" + chunkSizePower + ", chunkSizeMask=" + chunkSizeMask + ", nrBuffers=" + nrBuffers);
this.buffers = new ByteBuffer[nrBuffers];
long bufferStart = 0L;
FileChannel rafc = raf.getChannel();
for (int bufNr = 0; bufNr < nrBuffers; bufNr++) {
int bufSize = (int) ( (length > (bufferStart + chunkSize))
? chunkSize
: (length - bufferStart)
);
this.buffers[bufNr] = rafc.map(MapMode.READ_ONLY, bufferStart, bufSize);
bufferStart += bufSize;
}
seek(0L);
}
@Override
public byte readByte() throws IOException {
try {
return curBuf.get();
} catch (BufferUnderflowException e) {
do {
curBufIndex++;
if (curBufIndex >= buffers.length) {
throw new IOException("read past EOF: " + this);
}
curBuf = buffers[curBufIndex];
curBuf.position(0);
} while (!curBuf.hasRemaining());
return curBuf.get();
}
}
@Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
try {
curBuf.get(b, offset, len);
} catch (BufferUnderflowException e) {
int curAvail = curBuf.remaining();
while (len > curAvail) {
curBuf.get(b, offset, curAvail);
len -= curAvail;
offset += curAvail;
curBufIndex++;
if (curBufIndex >= buffers.length) {
throw new IOException("read past EOF: " + this);
}
curBuf = buffers[curBufIndex];
curBuf.position(0);
curAvail = curBuf.remaining();
}
curBuf.get(b, offset, len);
}
}
@Override
public int readInt() throws IOException {
try {
return curBuf.getInt();
} catch (BufferUnderflowException e) {
return super.readInt();
}
}
@Override
public long readLong() throws IOException {
try {
return curBuf.getLong();
} catch (BufferUnderflowException e) {
return super.readLong();
}
}
@Override
public long getFilePointer() {
return (((long) curBufIndex) << chunkSizePower) + curBuf.position();
}
@Override
public void seek(long pos) throws IOException {
// we use >> here to preserve negative, so we will catch AIOOBE:
final int bi = (int) (pos >> chunkSizePower);
try {
final ByteBuffer b = buffers[bi];
b.position((int) (pos & chunkSizeMask));
// write values, on exception all is unchanged
this.curBufIndex = bi;
this.curBuf = b;
} catch (ArrayIndexOutOfBoundsException aioobe) {
if (pos < 0L) {
throw new IllegalArgumentException("Seeking to negative position: " + this);
}
throw new IOException("seek past EOF");
} catch (IllegalArgumentException iae) {
if (pos < 0L) {
throw new IllegalArgumentException("Seeking to negative position: " + this);
}
throw new IOException("seek past EOF: " + this);
}
}
@Override
public long length() {
return length;
}
@Override
public Object clone() {
if (buffers == null) {
throw new AlreadyClosedException("MMapIndexInput already closed: " + this);
}
final MMapIndexInput clone = (MMapIndexInput)super.clone();
clone.isClone = true;
clone.buffers = new ByteBuffer[buffers.length];
// Since most clones will use only one buffer, duplicate() could also be
// done lazy in clones, e.g. when adapting curBuf.
for (int bufNr = 0; bufNr < buffers.length; bufNr++) {
clone.buffers[bufNr] = buffers[bufNr].duplicate();
}
try {
clone.seek(getFilePointer());
} catch(IOException ioe) {
throw new RuntimeException("Should never happen: " + this, ioe);
}
return clone;
}
@Override
public void close() throws IOException {
try {
if (isClone || buffers == null) return;
for (int bufNr = 0; bufNr < buffers.length; bufNr++) {
// unmap the buffer (if enabled) and at least unset it for GC
try {
cleanMapping(buffers[bufNr]);
} finally {
buffers[bufNr] = null;
}
}
} finally {
buffers = null;
}
}
}
}