package org.cdlib.xtf.textIndexer; /** * Copyright (c) 2004, Regents of the University of California * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the University of California nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ import java.io.File; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.cdlib.xtf.textEngine.NativeFSDirectory; import org.cdlib.xtf.util.Path; import org.cdlib.xtf.util.Trace; //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// /** * This class provides a simple mechanism for optimizing Lucene indices * after new documents have been added , updated, or removed. <br><br> * * When documents are added to a Lucene index, they form a "segment" that * contains information about the location and frequency for words appearing * in the document. Optimizing a Lucene index consists of merging multiple * segments into a single large segment. Doing so speeds searching by * eliminating the need to search multiple segments and combine the results. * <br><br> * * To use this class, simply instantiate a copy, and call the * {@link IdxTreeOptimizer#processDir(File) processDir()} * method on a directory containing an index. Note that the directory passed * may also be a root directory with many index sub-directories if desired. */ public class IdxTreeOptimizer { //////////////////////////////////////////////////////////////////////////// /** * Create an <code>IdxTreeOptimizer</code> instance and call this method to * optimize one or more Lucene indices. <br><br> * * @param dir The index database directory optimize. May be a * directory containing a single index, or the root * directory of a tree containing multiple indices. * <br><br> * * @throws Exception Passes back any exceptions generated by the * {@link IdxTreeOptimizer#optimizeIndex(File) optimizeIndex()} * function, which is called for each index sub-directory * found. <br><br> * * @.notes This method also calls itself recursively to process * potential index sub-directories below the passed * directory. */ public void processDir(File dir) throws Exception { // If the file we were passed was in fact a directory... if (dir.getAbsoluteFile().isDirectory()) { // And it contains an index, optimize it. if (IndexReader.indexExists(dir.getAbsoluteFile())) optimizeIndex(dir); else { // Get the list of files it contains. String[] files = dir.getAbsoluteFile().list(); // And process each of them. for (int i = 0; i < files.length; i++) processDir(new File(dir, files[i])); } return; } // if( dir.isDirectory() ) // The current file is not a directory, so skip it. } // processDir() //////////////////////////////////////////////////////////////////////////// /** * Performs the actual work of optimizing a Lucene index. * <br><br> * * @param idxDirToOptimize The index database directory clean. This * directory must contain a single Lucene index. * <br><br> * * @throws Exception Passes back any exceptions generated by Lucene * during the opening or optimization of the * specified index. * <br><br> */ public void optimizeIndex(File idxDirToOptimize) throws Exception { // Tell what index we're working on... String path = Path.normalizePath(idxDirToOptimize.toString()); Trace.info("Index: [" + path + "] ... "); Trace.tab(); try { // Try to open the index for writing. If we fail and // throw, skip the index. // Directory dir = NativeFSDirectory.getDirectory(idxDirToOptimize); IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), false); // Previously we were paranoid about using compound files, on the // mistaken assumption that indexes could not be modified. This is // not true... the modifications simply take place at the next merge, // which is always the case in Lucene (compound or not.) // // Thus, do not do the following: // NO NO NO: indexWriter.setUseCompoundFile( false ); // Optimize the index. indexWriter.optimize(); // Close the index. indexWriter.close(); // Indicate that we're done. Trace.more(Trace.info, "Done."); } // try( to open the specified index ) catch (Exception e) { Trace.error("*** Optimization Halted Due to Error:" + e); throw e; } Trace.untab(); } // optimizeIndex() }