package org.cdlib.xtf.textIndexer; /** * Copyright (c) 2004, Regents of the University of California * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the University of California nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import org.cdlib.xtf.util.*; //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// /** * This class records configuration information about the current state of * the TextIndexer application. <br><br> * * The main TextIndexer class uses this class to maintain information about its * current processing state. This information includes: <br><br> * - The path to the configuration file that defines indices to be created or * updated. <br> * - The output (trace) level to display while processing indices. <br> * - Whether or not the current index should be rebuilt from scratch (clean * build).<br> * - Whether or not the indexes should be optimized after being built.<br> * - The source text and index database directories to use for the current * index being processed. (This information is actually stored in a separate * IndexInfo sub-structure within IndexerConfig.) <br> * <br><br> */ public class IndexerConfig { /** Path of the XTF home directory */ public String xtfHomePath; /** Path to the config file. */ public String cfgFilePath; /** Trace level to output. Should be one or more of the trace constants * defined by the class org.cdlib.xtf.util.Trace . */ public int traceLevel; /** Flag indicating whether or not to build index from scratch or * incrementally. <br><br> * * true = Build index from scratch. <br> * false = Build index incrementally. <br><br> */ public boolean clean; /** Flag indicating whether or not index must still be cleaned or not. Set * to the value of clean just prior to processing each index listed in the * config file. <br><br> * * true = Build index from scratch. <br> * false = Build index incrementally. <br><br> */ public boolean mustClean; /** * Flag indicating whether or not to "force" indexing of items. * <br><br> * * true = Ignore file mod times during incremental indexing. <br> * false = Normal file mod time check. <br><br> */ public boolean force; /** Flag indicating whether to build lazy files during the indexing process. * <br><br> * * true = Build lazy files. <br> * False = Do not build lazy files. <br><br> */ public boolean buildLazyFiles; /** Flag indicating whether or not to optimize the index after building it. * <br><br> * * true = Optimize index after building. <br> * false = Do not optimize index. <br><br> */ public boolean optimize; /** * Flag indicating whether or not to build spelling dictionaries for * the index after building it. * <br><br> * * true = Build spelling dictionaries for index after building. <br> * false = Do not build spelling dictionaries index. <br><br> */ public boolean updateSpellDict; /** * Flag indicating whether or not to skip the main indexing pass. Useful * for debugging later phases, such as optimization or spelling. * <br><br> * * true = Skip the main indexing pass. <br> * false = Perform the main indexing pass. <br><br> */ public boolean skipIndexing; /** * Flag indicating whether or not to perform validation (on indexes which * are so marked). * <br><br> * * true = Validate indexes. <br> * false = Do not validate. <br><br> */ public boolean validate; /** * Flag indicating whether or not to perform rotation (on indexes which * are so marked). * <br><br> * * true = Rotate indexes. <br> * false = Do not rotate. <br><br> */ public boolean rotate; /** Index specific information for the current index being created or * updated. */ public IndexInfo indexInfo; //////////////////////////////////////////////////////////////////////////// /** Default constructor. <br><br> * * Initializes data members for this calls to reasonable default values. * <br><br> */ public IndexerConfig() { // Default to the XTF conf directory for the config file Path. cfgFilePath = "conf/textIndexer.conf"; // Default to incrementally updating the index. clean = false; // Default to not forcing (e.g. do normal file time checking) force = false; // Default to building lazy files during the run. buildLazyFiles = true; // Default to always optimizing the index. optimize = true; // Default to making spellcheck dictionary (if enabled in the index's info) updateSpellDict = true; // Default to performing the main indexing pass skipIndexing = false; // Default to always validating indexes for which it's enabled validate = true; // Default to always rotating indexes for which it's enabled rotate = true; // Set the default trace level to display errors. traceLevel = Trace.info; // Set defaults for the index info structure. indexInfo = new IndexInfo(); } // IndexerConfig() //////////////////////////////////////////////////////////////////////////// /** Processes command line arguments to set the corresponding data members * in this class. * <br><br> * * @param args A string containing the command line arguments passed to * the text indexer. * * @param startArg The character index at which to begin processing the next * command line argument. * * @return The character index at which to resume command line * argument processing the next time this function is * called. * * @.notes This function looks for the following command line * flags: <br><br> * * -config {path} (exactly one required)<br> * The path/name of the configuration file describing * the index(s) to process. <br><br> * * -clean (optional)<br> * A flag indicating that the index names that follow * should be rebuilt from scratch. If not specified, the * index names that follow will be added to or updated * incrementally. <br><br> * * -incremental (optional)<br> * A flag indicating that the index names that follow * should be added to or updated incrementally. If this * flag and the -clean flag are both omitted, incremental * updating will be used by default. <br><br> * * -index {name} (one or more required)<br> * The name of an index defined in the configuration file * to create or update. <br><br> * * -trace [errors | warnings | info | debug] (optional)<br> * Identifies the level of output the indexer should * echo back to the user. If not specified, info level * output is used as the default. <br><br> */ public int readCmdLine(String[] args, int startArg) { int i; // If there aren't any command line arguments left to check, // tell the caller we didn't find the necessary info to // continue. // if (startArg >= args.length) return -1; // Assume we haven't read the necessary arguments yet. boolean gotIdxName = false; // Start with no sub-directories selected. indexInfo.subDirs = null; // Process the command line arguments based on where we left off // last time (if there was a last time.) // for (i = startArg; i < args.length; i++) { // If we found the -config argument... if (args[i].equalsIgnoreCase("-config")) { // And there aren't any more arguments, tell the caller // that we failed to get enough info to continue. // if (++i >= args.length) return -1; // Otherwise, pickup the Path/name of config file file // to use from the next argument, and flag that we have // it. // cfgFilePath = args[i]; } // If we found the -index argument... else if (args[i].equalsIgnoreCase("-index")) { // And there aren't any more arguments, tell the caller // that we failed to get enough info to continue. // if (++i >= args.length) return -1; // Otherwise, pick up the index name, and flag that we // found it. // indexInfo.indexName = args[i]; gotIdxName = true; } // If the user wants to specify that indexing should apply only // to a specified sub-directory, record that info now. This option // can be repeated and/or mixed with "-dirList" // else if (args[i].equalsIgnoreCase("-dir")) { // If there aren't any more arguments, tell the caller // that we failed to get enough info to continue. // if (++i >= args.length) return -1; // If no dir list already, make one. if (indexInfo.subDirs == null) indexInfo.subDirs = new ArrayList<String>(); // Now add the newly specified directory. indexInfo.subDirs.add(args[i]); } // If the user wants to specify a list of sub-directories to which // indexing should only apply, record the contents of that list // now. This option can be repeated and/or mixed with "-dir". // else if (args[i].equalsIgnoreCase("-dirList")) { // If there aren't any more arguments, tell the caller // that we failed to get enough info to continue. // if (++i >= args.length) return -1; // Read in the file. The special name "-" can be used to // specify stdin. // String fileName = args[i]; try { InputStream inStream; String filePath = Path.resolveRelOrAbs(xtfHomePath, fileName); inStream = new FileInputStream(filePath); // Read each line and add it to the list. BufferedReader reader = new BufferedReader(new InputStreamReader(inStream)); while (true) { String line = reader.readLine(); if (line == null) break; String dirName = line.trim(); if (dirName.length() == 0) continue; // If no dir list already, make one. if (indexInfo.subDirs == null) indexInfo.subDirs = new ArrayList<String>(); // Now add the newly specified directory. indexInfo.subDirs.add(dirName); } reader.close(); } catch (IOException e) { Trace.error("Error reading directory list: " + e.toString()); Trace.error(""); return -1; } } // If the user asked for a clean index, flag it. else if (args[i].equalsIgnoreCase("-clean")) { clean = true; force = false; } // If the user asked for an incremental index update, flag it. else if (args[i].equalsIgnoreCase("-incremental")) { clean = false; force = false; } // If the user asked us to force indexing, flag it. else if (args[i].equalsIgnoreCase("-force")) { clean = false; force = true; } // If the user asked for optimization after build (or not), flag it. else if (args[i].equalsIgnoreCase("-optimize")) optimize = true; else if (args[i].equalsIgnoreCase("-nooptimize")) optimize = false; // If the user wants (or doesn't want) spelling update, flag it. else if (args[i].equalsIgnoreCase("-updatespell")) updateSpellDict = true; else if (args[i].equalsIgnoreCase("-noupdatespell")) updateSpellDict = false; // If the user asked for lazy files to be built (or not), flag it. else if (args[i].equalsIgnoreCase("-buildlazy")) buildLazyFiles = true; else if (args[i].equalsIgnoreCase("-nobuildlazy")) buildLazyFiles = false; // If the user asked for us to skip the main indexing pass, flag it. else if (args[i].equalsIgnoreCase("-skipindexing")) skipIndexing = true; // If the user asked us to validate or not, flag it. else if (args[i].equalsIgnoreCase("-validate")) validate = true; else if (args[i].equalsIgnoreCase("-novalidate")) validate = false; // If the user asked us to rotate or not, flag it. else if (args[i].equalsIgnoreCase("-rotate")) rotate = true; else if (args[i].equalsIgnoreCase("-norotate")) rotate = false; // If we found the -trace argument... else if (args[i].equalsIgnoreCase("-trace")) { // And there aren't any more arguments, tell the caller // that we failed to get enough info to continue. // if (++i >= args.length) return -1; // Otherwise, pickup the trace level to use. String traceLevelStr = args[i]; // Convert the trace level sting from the command line argument // into the equivalent constant. // if (traceLevelStr.equalsIgnoreCase("warnings")) traceLevel = Trace.warnings; else if (traceLevelStr.equalsIgnoreCase("info")) traceLevel = Trace.info; else if (traceLevelStr.equalsIgnoreCase("debug")) traceLevel = Trace.debug; else traceLevel = Trace.errors; } else { Trace.error("Unrecognized command-line parameter: " + args[i]); return -1; } // If we got to this point and have the index name, we're ready to go. if (gotIdxName) { i++; break; } } // for(;;) // If we didn't get the index name, bail. if (!gotIdxName) return -1; // Otherwise, return the index of the command line argument to // resume processing at the next time. // return i; } // public readCmdLine() } // class IndexerConfig