package org.wyona.yarep.impl.search.lucene; import java.io.File; import org.apache.avalon.framework.configuration.Configuration; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.tika.config.TikaConfig; import org.wyona.commons.io.FileUtil; import org.wyona.yarep.core.Repository; import org.wyona.yarep.core.RepositoryException; import org.wyona.yarep.core.search.Indexer; import org.wyona.yarep.core.search.SearchException; /** * Yarep Lucene configuration for Lucene indexer and searcher implementation */ public class LuceneConfig { private static Logger log = LogManager.getLogger(LuceneConfig.class); private File fulltextSearchIndexFile = null; private File propertiesSearchIndexFile = null; private Analyzer fulltextAnalyzer = null; private Analyzer propertyAnalyzer = null; private String FULLTEXT_INDEX_DIR = "fulltext"; private String PROPERTIES_INDEX_DIR = "properties"; private TikaConfig tikaConfig; private long writeLockTimeout = 0; private Repository repo; private boolean indexRevisions = false; private Indexer indexer = null; /** * */ public LuceneConfig(Configuration searchIndexConfig, String configParent, Repository repo) throws SearchException { this.repo = repo; configure(searchIndexConfig, configParent); } /** * Lucene specific configuration * @param searchConfig * @param configParent */ void configure(Configuration searchConfig, String configParent) throws SearchException { try { if (searchConfig != null) { if(searchConfig.getNamespace() == "" || searchConfig.getNamespace() == null) { deprecatedConfigure(searchConfig, configParent); return; } File searchIndexSrcFile = new File(searchConfig.getChild("index-location").getAttribute("file", "index")); if (!searchIndexSrcFile.isAbsolute()) { searchIndexSrcFile = FileUtil.file(configParent, searchIndexSrcFile.toString()); } Configuration luceneConfig = searchConfig.getChild("lucene"); indexRevisions = luceneConfig.getAttributeAsBoolean("index-revisions", false); String fulltextAnalyzerClass = luceneConfig.getChild("fulltext-analyzer").getAttribute("class","org.apache.lucene.analysis.standard.StandardAnalyzer"); fulltextAnalyzer = (Analyzer) Class.forName(fulltextAnalyzerClass).newInstance(); // TODO: For search within properties the WhitespaceAnalyzer is used because the StandardAnalyzer doesn't accept resp. misinterprets escaped query strings, e.g. 03\:07\- ... String propertyAnalyzerClass = luceneConfig.getChild("property-analyzer").getAttribute("class","org.apache.lucene.analysis.WhitespaceAnalyzer"); propertyAnalyzer = (Analyzer) Class.forName(propertyAnalyzerClass).newInstance(); fulltextSearchIndexFile = new File(searchIndexSrcFile, FULLTEXT_INDEX_DIR); // Because of backwards compatibility the source directory is used as fulltext directory if (!fulltextSearchIndexFile.isDirectory() && searchIndexSrcFile.exists()) { log.warn("Because '" + fulltextSearchIndexFile + "' does not exist, the source directory is used as fulltext directory: " + searchIndexSrcFile); fulltextSearchIndexFile = searchIndexSrcFile; } if (!fulltextSearchIndexFile.exists()) { log.warn("No such 'fulltext' search index path: " + fulltextSearchIndexFile); } else { log.info("Fulltext search index path: " + fulltextSearchIndexFile); } // Create a lucene search index if it doesn't exist yet // IMPORTANT: This doesn't work within a clustered environment, because the cluster node starting first will lock the index and all other nodes will not be able to startup! //this.indexWriter = createIndexWriter(fulltextSearchIndexFile, analyzer); String localTikaConfigSrc = luceneConfig.getChild("local-tika-config").getAttribute("file", null); if (localTikaConfigSrc != null) { File localTikaConfigFile = new File(localTikaConfigSrc); if (!localTikaConfigFile.isAbsolute()) { localTikaConfigFile = FileUtil.file(configParent, localTikaConfigFile.toString()); } if (localTikaConfigFile.isFile()) { log.warn("Use local tika config: " + localTikaConfigFile.getAbsolutePath()); tikaConfig = new TikaConfig(localTikaConfigFile); } else { log.error("No such file: " + localTikaConfigFile + " (Default tika config will be used)"); tikaConfig = TikaConfig.getDefaultConfig(); } } else { log.info("Use default tika config"); tikaConfig = TikaConfig.getDefaultConfig(); } // Create properties index dir subdirectory in order to save the lucene index for searching on properties propertiesSearchIndexFile = new File(searchIndexSrcFile, PROPERTIES_INDEX_DIR); if (!propertiesSearchIndexFile.exists()) { log.warn("No such 'properties' search index path: " + propertiesSearchIndexFile); } else { log.info("Properties search index path: " + propertiesSearchIndexFile); } // IMPORTANT: This doesn't work within a clustered environment, because the cluster node starting first will lock the index and all other nodes will not be able to startup! //this.propertiesIndexWriter = createIndexWriter(propertiesSearchIndexFile, whitespaceAnalyzer); if (luceneConfig.getChild("write-lock-timeout", false) != null) { writeLockTimeout = luceneConfig.getChild("write-lock-timeout").getAttributeAsLong("ms"); } else { writeLockTimeout = 1001; // INFO: 1001 milliseconds log.warn("No 'write.lock' timeout configured, hence use hard-coded value: " + writeLockTimeout); } } else { log.warn("No search index dir (<search-index src=\"...\"/>) configured within: " + configParent); } } catch (Exception e) { log.error(e.toString()); throw new SearchException("Could not read repository configuration: " + e.getMessage(), e); } } /** * @deprecated * @param searchIndexConfig * @param configParent */ public void deprecatedConfigure(Configuration searchIndexConfig, String configParent) throws SearchException { log.warn("DEPRECATED: This config schema is deprecated (" + repo.getConfigFile() + ")! Use the new schema described at http://svn.wyona.com/repos/public/yarep/trunk/src/test/repository/new-vfs-example/repository.xml"); try { if (searchIndexConfig != null) { File searchIndexSrcFile = new File(searchIndexConfig.getAttribute("src", "index")); if (!searchIndexSrcFile.isAbsolute()) { searchIndexSrcFile = FileUtil.file(configParent, searchIndexSrcFile.toString()); } boolean isFulltextIndexingEnabled = searchIndexConfig.getAttributeAsBoolean( "index-fulltext", true); boolean isPropertyIndexingEnabled = searchIndexConfig.getAttributeAsBoolean( "index-properties", true); fulltextAnalyzer = new StandardAnalyzer(); // TODO: For search within properties the WhitespaceAnalyzer is used because the StandardAnalyzer doesn't accept resp. misinterprets escaped query strings, e.g. 03\:07\- ... propertyAnalyzer = new WhitespaceAnalyzer(); indexer = new LuceneIndexer(); fulltextSearchIndexFile = new File(searchIndexSrcFile, FULLTEXT_INDEX_DIR); if (!fulltextSearchIndexFile.isDirectory() && searchIndexSrcFile.exists()) { fulltextSearchIndexFile = searchIndexSrcFile; } log.info("Fulltext search index path: " + fulltextSearchIndexFile); // Create a lucene search index if it doesn't exist yet // IMPORTANT: This doesn't work within a clustered environment, because the cluster node starting first will lock the index and all other nodes will not be able to startup! //this.indexWriter = createIndexWriter(fulltextSearchIndexFile, analyzer); String localTikaConfigSrc = searchIndexConfig.getAttribute("local-tika-config", null); if (localTikaConfigSrc != null) { File localTikaConfigFile = new File(localTikaConfigSrc); if (!localTikaConfigFile.isAbsolute()) { localTikaConfigFile = FileUtil.file(configParent, localTikaConfigFile.toString()); } if (localTikaConfigFile.isFile()) { log.warn("Use local tika config: " + localTikaConfigFile.getAbsolutePath()); tikaConfig = new TikaConfig(localTikaConfigFile); } else { log.error("No such file: " + localTikaConfigFile + " (Default tika config will be used)"); tikaConfig = TikaConfig.getDefaultConfig(); } } else { log.info("Use default tika config"); tikaConfig = TikaConfig.getDefaultConfig(); } // Create properties index dir subdirectory in order to save the lucene index for searching on properties propertiesSearchIndexFile = new File(searchIndexSrcFile, PROPERTIES_INDEX_DIR); log.warn("Properties search index path: " + propertiesSearchIndexFile); // IMPORTANT: This doesn't work within a clustered environment, because the cluster node starting first will lock the index and all other nodes will not be able to startup! //this.propertiesIndexWriter = createIndexWriter(propertiesSearchIndexFile, whitespaceAnalyzer); writeLockTimeout = 1002; //searchIndexConfig.getAttribute("write-lock-timeout", 14); log.warn("The write lock timeout is hardcoded: " + writeLockTimeout); } else { log.warn("No search index dir (<search-index src=\"...\"/>) configured."); } } catch (Exception e) { log.error("Could not read configuration", e); } } public File getFulltextSearchIndexFile() { return fulltextSearchIndexFile; } public File getPropertiesSearchIndexFile() { return propertiesSearchIndexFile; } public Analyzer getFulltextAnalyzer() { return fulltextAnalyzer; } public Analyzer getPropertyAnalyzer() { return propertyAnalyzer; } public String getFULLTEXT_INDEX_DIR() { return FULLTEXT_INDEX_DIR; } public String getPROPERTIES_INDEX_DIR() { return PROPERTIES_INDEX_DIR; } public Indexer getIndexer() { return indexer; } public TikaConfig getTikaConfig() { return tikaConfig; } /** * Get write lock timeout */ public long getWriteLockTimeout() { //log.debug("Configured timeout: " + writeLockTimeout); return writeLockTimeout; } /** * Get repository which contains the content for which this index has been created */ public Repository getRepo() { return repo; } /** * Check whether revisions should be indexed */ public boolean doIndexRevisions() { return indexRevisions; } }