package org.apache.lucene.store; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.File; import java.io.FileNotFoundException; import java.io.FilenameFilter; import java.io.IOException; import java.io.RandomAccessFile; import java.util.Collection; import static java.util.Collections.synchronizedSet; import java.util.HashSet; import java.util.Set; import java.util.concurrent.Future; import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.Constants; /** * Base class for Directory implementations that store index * files in the file system. * <a name="subclasses"/> * There are currently three core * subclasses: * * <ul> * * <li> {@link SimpleFSDirectory} is a straightforward * implementation using java.io.RandomAccessFile. * However, it has poor concurrent performance * (multiple threads will bottleneck) as it * synchronizes when multiple threads read from the * same file. * * <li> {@link NIOFSDirectory} uses java.nio's * FileChannel's positional io when reading to avoid * synchronization when reading from the same file. * Unfortunately, due to a Windows-only <a * href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265734">Sun * JRE bug</a> this is a poor choice for Windows, but * on all other platforms this is the preferred * choice. Applications using {@link Thread#interrupt()} or * {@link Future#cancel(boolean)} should use * {@link SimpleFSDirectory} instead. See {@link NIOFSDirectory} java doc * for details. * * * * <li> {@link MMapDirectory} uses memory-mapped IO when * reading. This is a good choice if you have plenty * of virtual memory relative to your index size, eg * if you are running on a 64 bit JRE, or you are * running on a 32 bit JRE but your index sizes are * small enough to fit into the virtual memory space. * Java has currently the limitation of not being able to * unmap files from user code. The files are unmapped, when GC * releases the byte buffers. Due to * <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4724038"> * this bug</a> in Sun's JRE, MMapDirectory's {@link IndexInput#close} * is unable to close the underlying OS file handle. Only when * GC finally collects the underlying objects, which could be * quite some time later, will the file handle be closed. * This will consume additional transient disk usage: on Windows, * attempts to delete or overwrite the files will result in an * exception; on other platforms, which typically have a "delete on * last close" semantics, while such operations will succeed, the bytes * are still consuming space on disk. For many applications this * limitation is not a problem (e.g. if you have plenty of disk space, * and you don't rely on overwriting files on Windows) but it's still * an important limitation to be aware of. This class supplies a * (possibly dangerous) workaround mentioned in the bug report, * which may fail on non-Sun JVMs. * * Applications using {@link Thread#interrupt()} or * {@link Future#cancel(boolean)} should use * {@link SimpleFSDirectory} instead. See {@link MMapDirectory} * java doc for details. * </ul> * * Unfortunately, because of system peculiarities, there is * no single overall best implementation. Therefore, we've * added the {@link #open} method, to allow Lucene to choose * the best FSDirectory implementation given your * environment, and the known limitations of each * implementation. For users who have no reason to prefer a * specific implementation, it's best to simply use {@link * #open}. For all others, you should instantiate the * desired implementation directly. * * <p>The locking implementation is by default {@link * NativeFSLockFactory}, but can be changed by * passing in a custom {@link LockFactory} instance. * * @see Directory */ public abstract class FSDirectory extends Directory { /** * Default read chunk size. This is a conditional default: on 32bit JVMs, it defaults to 100 MB. On 64bit JVMs, it's * <code>Integer.MAX_VALUE</code>. * * @see #setReadChunkSize */ public static final int DEFAULT_READ_CHUNK_SIZE = Constants.JRE_IS_64BIT ? Integer.MAX_VALUE : 100 * 1024 * 1024; protected final File directory; // The underlying filesystem directory protected final Set<String> staleFiles = synchronizedSet(new HashSet<String>()); // Files written, but not yet sync'ed private int chunkSize = DEFAULT_READ_CHUNK_SIZE; // LUCENE-1566 // null means no limit private volatile RateLimiter mergeWriteRateLimiter; // returns the canonical version of the directory, creating it if it doesn't exist. private static File getCanonicalPath(File file) throws IOException { return new File(file.getCanonicalPath()); } /** Create a new FSDirectory for the named location (ctor for subclasses). * @param path the path of the directory * @param lockFactory the lock factory to use, or null for the default * ({@link NativeFSLockFactory}); * @throws IOException if there is a low-level I/O error */ protected FSDirectory(File path, LockFactory lockFactory) throws IOException { // new ctors use always NativeFSLockFactory as default: if (lockFactory == null) { lockFactory = new NativeFSLockFactory(); } directory = getCanonicalPath(path); if (directory.exists() && !directory.isDirectory()) throw new NoSuchDirectoryException("file '" + directory + "' exists but is not a directory"); setLockFactory(lockFactory); } /** Creates an FSDirectory instance, trying to pick the * best implementation given the current environment. * The directory returned uses the {@link NativeFSLockFactory}. * * <p>Currently this returns {@link MMapDirectory} for most Solaris * and Windows 64-bit JREs, {@link NIOFSDirectory} for other * non-Windows JREs, and {@link SimpleFSDirectory} for other * JREs on Windows. It is highly recommended that you consult the * implementation's documentation for your platform before * using this method. * * <p><b>NOTE</b>: this method may suddenly change which * implementation is returned from release to release, in * the event that higher performance defaults become * possible; if the precise implementation is important to * your application, please instantiate it directly, * instead. For optimal performance you should consider using * {@link MMapDirectory} on 64 bit JVMs. * * <p>See <a href="#subclasses">above</a> */ public static FSDirectory open(File path) throws IOException { return open(path, null); } /** Just like {@link #open(File)}, but allows you to * also specify a custom {@link LockFactory}. */ public static FSDirectory open(File path, LockFactory lockFactory) throws IOException { if ((Constants.WINDOWS || Constants.SUN_OS || Constants.LINUX) && Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) { return new MMapDirectory(path, lockFactory); } else if (Constants.WINDOWS) { return new SimpleFSDirectory(path, lockFactory); } else { return new NIOFSDirectory(path, lockFactory); } } @Override public void setLockFactory(LockFactory lockFactory) throws IOException { super.setLockFactory(lockFactory); // for filesystem based LockFactory, delete the lockPrefix, if the locks are placed // in index dir. If no index dir is given, set ourselves if (lockFactory instanceof FSLockFactory) { final FSLockFactory lf = (FSLockFactory) lockFactory; final File dir = lf.getLockDir(); // if the lock factory has no lockDir set, use the this directory as lockDir if (dir == null) { lf.setLockDir(directory); lf.setLockPrefix(null); } else if (dir.getCanonicalPath().equals(directory.getCanonicalPath())) { lf.setLockPrefix(null); } } } /** Lists all files (not subdirectories) in the * directory. This method never returns null (throws * {@link IOException} instead). * * @throws NoSuchDirectoryException if the directory * does not exist, or does exist but is not a * directory. * @throws IOException if list() returns null */ public static String[] listAll(File dir) throws IOException { if (!dir.exists()) throw new NoSuchDirectoryException("directory '" + dir + "' does not exist"); else if (!dir.isDirectory()) throw new NoSuchDirectoryException("file '" + dir + "' exists but is not a directory"); // Exclude subdirs String[] result = dir.list(new FilenameFilter() { public boolean accept(File dir, String file) { return !new File(dir, file).isDirectory(); } }); if (result == null) throw new IOException("directory '" + dir + "' exists and is a directory, but cannot be listed: list() returned null"); return result; } /** Lists all files (not subdirectories) in the * directory. * @see #listAll(File) */ @Override public String[] listAll() throws IOException { ensureOpen(); return listAll(directory); } /** Returns true iff a file with the given name exists. */ @Override public boolean fileExists(String name) { ensureOpen(); File file = new File(directory, name); return file.exists(); } /** Returns the time the named file was last modified. */ public static long fileModified(File directory, String name) { File file = new File(directory, name); return file.lastModified(); } /** Returns the length in bytes of a file in the directory. */ @Override public long fileLength(String name) throws IOException { ensureOpen(); File file = new File(directory, name); final long len = file.length(); if (len == 0 && !file.exists()) { throw new FileNotFoundException(name); } else { return len; } } /** Removes an existing file in the directory. */ @Override public void deleteFile(String name) throws IOException { ensureOpen(); File file = new File(directory, name); if (!file.delete()) throw new IOException("Cannot delete " + file); staleFiles.remove(name); } /** Creates an IndexOutput for the file with the given name. */ @Override public IndexOutput createOutput(String name, IOContext context) throws IOException { ensureOpen(); ensureCanWrite(name); return new FSIndexOutput(this, name, context.context == IOContext.Context.MERGE ? mergeWriteRateLimiter : null); } /** Sets the maximum (approx) MB/sec allowed by all write * IO performed by merging. Pass null to have no limit. * * <p><b>NOTE</b>: if merges are already running there is * no guarantee this new rate will apply to them; it will * only apply for certain to new merges. * * @lucene.experimental */ public void setMaxMergeWriteMBPerSec(Double mbPerSec) { RateLimiter limiter = mergeWriteRateLimiter; if (mbPerSec == null) { if (limiter != null) { limiter.setMbPerSec(Double.MAX_VALUE); mergeWriteRateLimiter = null; } } else if (limiter != null) { limiter.setMbPerSec(mbPerSec); } else { mergeWriteRateLimiter = new RateLimiter(mbPerSec); } } /** * Sets the rate limiter to be used to limit (approx) MB/sec allowed * by all IO performed when merging. Pass null to have no limit. * * <p>Passing an instance of rate limiter compared to setting it using * {@link #setMaxMergeWriteMBPerSec(Double)} allows to use the same limiter * instance across several directories globally limiting IO when merging * across them. * * @lucene.experimental */ public void setMaxMergeWriteLimiter(RateLimiter mergeWriteRateLimiter) { this.mergeWriteRateLimiter = mergeWriteRateLimiter; } /** See {@link #setMaxMergeWriteMBPerSec}. * * @lucene.experimental */ public Double getMaxMergeWriteMBPerSec() { RateLimiter limiter = mergeWriteRateLimiter; return limiter == null ? null : limiter.getMbPerSec(); } protected void ensureCanWrite(String name) throws IOException { if (!directory.exists()) if (!directory.mkdirs()) throw new IOException("Cannot create directory: " + directory); File file = new File(directory, name); if (file.exists() && !file.delete()) // delete existing, if any throw new IOException("Cannot overwrite: " + file); } protected void onIndexOutputClosed(FSIndexOutput io) { staleFiles.add(io.name); } @Override public void sync(Collection<String> names) throws IOException { ensureOpen(); Set<String> toSync = new HashSet<String>(names); toSync.retainAll(staleFiles); for (String name : toSync) fsync(name); staleFiles.removeAll(toSync); } @Override public String getLockID() { ensureOpen(); String dirName; // name to be hashed try { dirName = directory.getCanonicalPath(); } catch (IOException e) { throw new RuntimeException(e.toString(), e); } int digest = 0; for(int charIDX=0;charIDX<dirName.length();charIDX++) { final char ch = dirName.charAt(charIDX); digest = 31 * digest + ch; } return "lucene-" + Integer.toHexString(digest); } /** Closes the store to future operations. */ @Override public synchronized void close() { isOpen = false; } /** @return the underlying filesystem directory */ public File getDirectory() { ensureOpen(); return directory; } /** For debug output. */ @Override public String toString() { return this.getClass().getName() + "@" + directory + " lockFactory=" + getLockFactory(); } /** * Sets the maximum number of bytes read at once from the * underlying file during {@link IndexInput#readBytes}. * The default value is {@link #DEFAULT_READ_CHUNK_SIZE}; * * <p> This was introduced due to <a * href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546">Sun * JVM Bug 6478546</a>, which throws an incorrect * OutOfMemoryError when attempting to read too many bytes * at once. It only happens on 32bit JVMs with a large * maximum heap size.</p> * * <p>Changes to this value will not impact any * already-opened {@link IndexInput}s. You should call * this before attempting to open an index on the * directory.</p> * * <p> <b>NOTE</b>: This value should be as large as * possible to reduce any possible performance impact. If * you still encounter an incorrect OutOfMemoryError, * trying lowering the chunk size.</p> */ public final void setReadChunkSize(int chunkSize) { // LUCENE-1566 if (chunkSize <= 0) { throw new IllegalArgumentException("chunkSize must be positive"); } if (!Constants.JRE_IS_64BIT) { this.chunkSize = chunkSize; } } /** * The maximum number of bytes to read at once from the * underlying file during {@link IndexInput#readBytes}. * @see #setReadChunkSize */ public final int getReadChunkSize() { // LUCENE-1566 return chunkSize; } /** Base class for reading input from a RandomAccessFile */ protected abstract static class FSIndexInput extends BufferedIndexInput { /** the underlying RandomAccessFile */ protected final RandomAccessFile file; boolean isClone = false; /** maximum read length on a 32bit JVM to prevent incorrect OOM, see LUCENE-1566 */ protected final int chunkSize; /** start offset: non-zero in the slice case */ protected final long off; /** end offset (start+length) */ protected final long end; /** Create a new FSIndexInput, reading the entire file from <code>path</code> */ protected FSIndexInput(String resourceDesc, File path, IOContext context, int chunkSize) throws IOException { super(resourceDesc, context); this.file = new RandomAccessFile(path, "r"); this.chunkSize = chunkSize; this.off = 0L; this.end = file.length(); } /** Create a new FSIndexInput, representing a slice of an existing open <code>file</code> */ protected FSIndexInput(String resourceDesc, RandomAccessFile file, long off, long length, int bufferSize, int chunkSize) { super(resourceDesc, bufferSize); this.file = file; this.chunkSize = chunkSize; this.off = off; this.end = off + length; this.isClone = true; // well, we are sorta? } @Override public void close() throws IOException { // only close the file if this is not a clone if (!isClone) { file.close(); } } @Override public FSIndexInput clone() { FSIndexInput clone = (FSIndexInput)super.clone(); clone.isClone = true; return clone; } @Override public final long length() { return end - off; } /** Method used for testing. Returns true if the underlying * file descriptor is valid. */ boolean isFDValid() throws IOException { return file.getFD().valid(); } } /** * Writes output with {@link RandomAccessFile#write(byte[], int, int)} */ protected static class FSIndexOutput extends BufferedIndexOutput { private final FSDirectory parent; private final String name; private final RandomAccessFile file; private volatile boolean isOpen; // remember if the file is open, so that we don't try to close it more than once private final RateLimiter rateLimiter; public FSIndexOutput(FSDirectory parent, String name, RateLimiter rateLimiter) throws IOException { this.parent = parent; this.name = name; file = new RandomAccessFile(new File(parent.directory, name), "rw"); isOpen = true; this.rateLimiter = rateLimiter; } /** output methods: */ @Override public void flushBuffer(byte[] b, int offset, int size) throws IOException { assert isOpen; if (rateLimiter != null) { rateLimiter.pause(size); } file.write(b, offset, size); } @Override public void close() throws IOException { parent.onIndexOutputClosed(this); // only close the file if it has not been closed yet if (isOpen) { boolean success = false; try { super.close(); success = true; } finally { isOpen = false; if (!success) { try { file.close(); } catch (Throwable t) { // Suppress so we don't mask original exception } } else { file.close(); } } } } /** Random-access methods */ @Override public void seek(long pos) throws IOException { super.seek(pos); file.seek(pos); } @Override public long length() throws IOException { return file.length(); } @Override public void setLength(long length) throws IOException { file.setLength(length); } } protected void fsync(String name) throws IOException { File fullFile = new File(directory, name); boolean success = false; int retryCount = 0; IOException exc = null; while (!success && retryCount < 5) { retryCount++; RandomAccessFile file = null; try { try { file = new RandomAccessFile(fullFile, "rw"); file.getFD().sync(); success = true; } finally { if (file != null) file.close(); } } catch (IOException ioe) { if (exc == null) exc = ioe; try { // Pause 5 msec Thread.sleep(5); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); } } } if (!success) // Throw original exception throw exc; } }