/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.exoplatform.services.jcr.impl.core.query.lucene; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import org.exoplatform.services.jcr.impl.core.query.IndexerIoMode; import org.exoplatform.services.jcr.impl.core.query.IndexerIoModeHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; import java.util.BitSet; /** * Implements common functionality for a lucene index. * <br> * Note on synchronization: This class is not entirely thread-safe. Certain * concurrent access is however allowed. Read-only access on this index using * {@link #getReadOnlyIndexReader()} is thread-safe. That is, multiple threads * my call that method concurrently and use the returned IndexReader at the same * time.<br> * Modifying threads must be synchronized externally in a way that only one * thread is using the returned IndexReader and IndexWriter instances returned * by {@link #getIndexReader()} and {@link #getIndexWriter()} at a time.<br> * Concurrent access by <b>one</b> modifying thread and multiple read-only * threads is safe! */ abstract class AbstractIndex { /** The logger instance for this class */ private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.AbstractIndex"); /** PrintStream that pipes all calls to println(String) into log.info() */ private static final LoggingPrintStream STREAM_LOGGER = new LoggingPrintStream(); /** The currently set IndexWriter or <code>null</code> if none is set */ private IndexWriter indexWriter; /** The currently set IndexReader or <code>null</code> if none is set */ private CommittableIndexReader indexReader; /** The underlying Directory where the index is stored */ private Directory directory; /** Analyzer we use to tokenize text */ private Analyzer analyzer; /** The similarity in use for indexing and searching. */ private final Similarity similarity; /** Compound file flag */ private boolean useCompoundFile = true; /** maxFieldLength config parameter */ private int maxFieldLength = SearchIndex.DEFAULT_MAX_FIELD_LENGTH; /** termInfosIndexDivisor config parameter */ private int termInfosIndexDivisor = SearchIndex.DEFAULT_TERM_INFOS_INDEX_DIVISOR; /** * The document number cache if this index may use one. */ private DocNumberCache cache; /** The shared IndexReader for all read-only IndexReaders */ private SharedIndexReader sharedReader; /** * The most recent read-only reader if there is any. */ private ReadOnlyIndexReader readOnlyReader; /** * Flag that indicates whether there was an index present in the directory * when this AbstractIndex was created. */ private boolean isExisting; protected final IndexerIoModeHandler modeHandler; /** * Constructs an index with an <code>analyzer</code> and a * <code>directory</code>. * * @param analyzer the analyzer for text tokenizing. * @param similarity the similarity implementation. * @param directory the underlying directory. * @param cache the document number cache if this index should use * one; otherwise <code>cache</code> is * <code>null</code>. * @throws IOException if the index cannot be initialized. */ AbstractIndex(final Analyzer analyzer, Similarity similarity, final Directory directory, DocNumberCache cache, IndexerIoModeHandler modeHandler) throws IOException { this.analyzer = analyzer; this.similarity = similarity; this.directory = directory; this.cache = cache; this.modeHandler = modeHandler; AbstractIndex.this.isExisting = IndexReader.indexExists(directory); if (!isExisting) { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); indexWriter = new IndexWriter(directory, config); // immediately close, now that index has been created indexWriter.close(); indexWriter = null; } } /** * Default implementation returns the same instance as passed * in the constructor. * * @return the directory instance passed in the constructor */ Directory getDirectory() { return directory; } /** * Returns <code>true</code> if this index was opened on a directory with * an existing index in it; <code>false</code> otherwise. * * @return <code>true</code> if there was an index present when this index * was created; <code>false</code> otherwise. */ boolean isExisting() { return isExisting; } /** * Adds documents to this index and invalidates the shared reader. * * @param docs the documents to add. * @throws IOException if an error occurs while writing to the index. */ void addDocuments(final Document[] docs) throws IOException { final IndexWriter writer = getIndexWriter(); IOException ioExc = null; try { for (Document doc : docs) { try { writer.addDocument(doc); } catch (Throwable e) //NOSONAR { if (ioExc == null) { if (e instanceof IOException) { ioExc = (IOException)e; } else { ioExc = Util.createIOException(e); } } log.warn("Exception while inverting document", e); } } } finally { invalidateSharedReader(); } if (ioExc != null) { throw ioExc; } } /** * Removes the document from this index. This call will not invalidate * the shared reader. If a subclass wishes to do so, it should overwrite * this method and call {@link #invalidateSharedReader()}. * * @param idTerm the id term of the document to remove. * @throws IOException if an error occurs while removing the document. * @return number of documents deleted */ int removeDocument(final Term idTerm) throws IOException { return getIndexReader().deleteDocuments(idTerm); } /** * Returns an <code>IndexReader</code> on this index. This index reader * may be used to delete documents. * * @return an <code>IndexReader</code> on this index. * @throws IOException if the reader cannot be obtained. */ protected synchronized CommittableIndexReader getIndexReader() throws IOException { if (indexWriter != null) { indexWriter.close(); log.debug("closing IndexWriter."); indexWriter = null; } if (indexReader == null || !indexReader.isCurrent()) { IndexReader reader = IndexReader.open(getDirectory(), null, false, termInfosIndexDivisor); // if modeHandler != null and mode==READ_ONLY, then reader should be with transient deletions. // This is used to transiently update reader in clustered environment when some documents have // been deleted. If index reader not null and already contains some transient deletions, but it // is no more current, it will be re-created loosing deletions. They will already be applied by // coordinator node in the cluster. And there is no need to inject them into the new reader indexReader = new CommittableIndexReader(reader, modeHandler != null && modeHandler.getMode() == IndexerIoMode.READ_ONLY); } return indexReader; } /** * Returns a read-only index reader, that can be used concurrently with * other threads writing to this index. The returned index reader is * read-only, that is, any attempt to delete a document from the index * will throw an <code>UnsupportedOperationException</code>. * * @param initCache if the caches in the index reader should be initialized * before the index reader is returned. * @return a read-only index reader. * @throws IOException if an error occurs while obtaining the index reader. */ synchronized ReadOnlyIndexReader getReadOnlyIndexReader(final boolean initCache) throws IOException { // get current modifiable index reader CommittableIndexReader modifiableReader = getIndexReader(); long modCount = modifiableReader.getModificationCount(); if (readOnlyReader != null) { if (readOnlyReader.getDeletedDocsVersion() == modCount) { // reader up-to-date readOnlyReader.acquire(); return readOnlyReader; } else { // reader outdated if (readOnlyReader.getRefCounter() == 1) { // not in use, except by this index // update the reader readOnlyReader.updateDeletedDocs(modifiableReader); readOnlyReader.acquire(); return readOnlyReader; } else { // cannot update reader, it is still in use // need to create a new instance readOnlyReader.release(); readOnlyReader = null; } } } // if we get here there is no up-to-date read-only reader // capture snapshot of deleted documents BitSet deleted = new BitSet(modifiableReader.maxDoc()); for (int i = 0; i < modifiableReader.maxDoc(); i++) { if (modifiableReader.isDeleted(i)) { deleted.set(i); } } if (sharedReader == null) { // create new shared reader IndexReader reader = IndexReader.open(getDirectory(), termInfosIndexDivisor); CachingIndexReader cr = new CachingIndexReader(reader, cache, initCache); sharedReader = new SharedIndexReader(cr); } readOnlyReader = new ReadOnlyIndexReader(sharedReader, deleted, modCount); readOnlyReader.acquire(); return readOnlyReader; } /** * Returns a read-only index reader, that can be used concurrently with * other threads writing to this index. The returned index reader is * read-only, that is, any attempt to delete a document from the index * will throw an <code>UnsupportedOperationException</code>. * * @return a read-only index reader. * @throws IOException if an error occurs while obtaining the index reader. */ protected ReadOnlyIndexReader getReadOnlyIndexReader() throws IOException { return getReadOnlyIndexReader(false); } /** * Returns an <code>IndexWriter</code> on this index. * @return an <code>IndexWriter</code> on this index. * @throws IOException if the writer cannot be obtained. */ protected synchronized IndexWriter getIndexWriter() throws IOException { if (indexReader != null) { indexReader.close(); log.debug("closing IndexReader."); indexReader = null; } if (indexWriter == null) { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); config.setSimilarity(similarity); if (config.getMergePolicy() instanceof LogMergePolicy) { ((LogMergePolicy)config.getMergePolicy()).setUseCompoundFile(useCompoundFile); } else if (config.getMergePolicy() instanceof TieredMergePolicy) { ((TieredMergePolicy)config.getMergePolicy()).setUseCompoundFile(useCompoundFile); } else { log.error("Can't set \"UseCompoundFile\". Merge policy is not an instance of LogMergePolicy. "); } indexWriter = new IndexWriter(directory, config); setUseCompoundFile(useCompoundFile); indexWriter.setInfoStream(STREAM_LOGGER); } return indexWriter; } /** * Commits all pending changes to the underlying <code>Directory</code>. * @throws IOException if an error occurs while commiting changes. */ protected void commit() throws IOException { commit(false); } /** * Commits all pending changes to the underlying <code>Directory</code>. * * @param optimize if <code>true</code> the index is optimized after the * commit. * @throws IOException if an error occurs while commiting changes. */ protected synchronized void commit(final boolean optimize) throws IOException { if (indexReader != null) { log.debug("committing IndexReader."); indexReader.flush(); } if (indexWriter != null) { log.debug("committing IndexWriter."); indexWriter.commit(); } // optimize if requested if (optimize) { IndexWriter writer = getIndexWriter(); writer.forceMerge(1, true); writer.close(); indexWriter = null; } } /** * Closes this index, releasing all held resources. */ synchronized void close() { releaseWriterAndReaders(); if (directory != null) { try { directory.close(); } catch (IOException e) { directory = null; } } } /** * Releases all potentially held index writer and readers. */ protected void releaseWriterAndReaders() { if (indexWriter != null) { try { indexWriter.close(); } catch (IOException e) { log.warn("Exception closing index writer: " + e.toString()); } indexWriter = null; } if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { log.warn("Exception closing index reader: " + e.toString()); } indexReader = null; } if (readOnlyReader != null) { try { readOnlyReader.release(); } catch (IOException e) { log.warn("Exception closing index reader: " + e.toString()); } readOnlyReader = null; } if (sharedReader != null) { try { sharedReader.release(); } catch (IOException e) { log.warn("Exception closing index reader: " + e.toString()); } sharedReader = null; } } /** * @return the number of bytes this index occupies in memory. */ synchronized long getRamSizeInBytes() { if (indexWriter != null) { return indexWriter.ramSizeInBytes(); } else { return 0; } } /** * Closes the shared reader. * * @throws IOException if an error occurs while closing the reader. */ protected synchronized void invalidateSharedReader() throws IOException { // also close the read-only reader if (readOnlyReader != null) { readOnlyReader.release(); readOnlyReader = null; } // invalidate shared reader if (sharedReader != null) { sharedReader.release(); sharedReader = null; } } //-------------------------< properties >----------------------------------- /** * The lucene index writer property: useCompountFile */ void setUseCompoundFile(boolean b) { useCompoundFile = b; if (indexWriter != null) { IndexWriterConfig config = indexWriter.getConfig(); if (config.getMergePolicy() instanceof LogMergePolicy) { ((LogMergePolicy)config.getMergePolicy()).setUseCompoundFile(useCompoundFile); ((LogMergePolicy)config.getMergePolicy()).setNoCFSRatio(1.0); } else if (config.getMergePolicy() instanceof TieredMergePolicy) { ((TieredMergePolicy)config.getMergePolicy()).setUseCompoundFile(useCompoundFile); ((TieredMergePolicy)config.getMergePolicy()).setNoCFSRatio(1.0); } else { log.error("Can't set \"UseCompoundFile\". Merge policy is not an instance of LogMergePolicy. "); } } } /** * The lucene index writer property: maxFieldLength */ void setMaxFieldLength(int maxFieldLength) { this.maxFieldLength = maxFieldLength; if (indexWriter != null) { indexWriter.setMaxFieldLength(this.maxFieldLength); } } /** * @return the current value for termInfosIndexDivisor. */ public int getTermInfosIndexDivisor() { return termInfosIndexDivisor; } /** * Sets a new value for termInfosIndexDivisor. * * @param termInfosIndexDivisor the new value. */ public void setTermInfosIndexDivisor(int termInfosIndexDivisor) { this.termInfosIndexDivisor = termInfosIndexDivisor; } //------------------------------< internal >-------------------------------- /** * Adapter to pipe info messages from lucene into log messages. */ private static final class LoggingPrintStream extends PrintStream { /** Buffer print calls until a newline is written */ private StringBuffer buffer = new StringBuffer(); public LoggingPrintStream() { super(new OutputStream() { @Override public void write(int b) { // do nothing } }); } @Override public void print(String s) { buffer.append(s); } @Override public void println(String s) { buffer.append(s); log.debug(buffer.toString()); buffer.setLength(0); } } }