/* * Copyright 2014, Stratio. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.stratio.cassandra.index.service; import com.stratio.cassandra.util.Log; import org.apache.cassandra.io.util.FileUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.*; import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector; import org.apache.lucene.index.sorter.SortingMergePolicy; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.NRTCachingDirectory; import org.apache.lucene.util.Version; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Set; /** * Class wrapping a Lucene directory and its readers , writers and searchers for NRT. * * @author Andres de la Pena <adelapena@stratio.com> */ public class LuceneIndex { private final RowMapper rowMapper; private final String path; private final Double refreshSeconds; private final Integer ramBufferMB; private final Integer maxMergeMB; private final Integer maxCachedMB; private final Analyzer analyzer; private File file; private Directory directory; private IndexWriter indexWriter; private SearcherManager searcherManager; private ControlledRealTimeReopenThread<IndexSearcher> searcherReopener; private Sort sort; static { BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); } /** * Builds a new {@code RowDirectory} using the specified directory path and analyzer. * * @param rowMapper A {@link RowMapper}. * @param path The analyzer to be used. The path of the directory in where the Lucene files will be * stored. * @param refreshSeconds The index readers refresh time in seconds. No guarantees that the writings are visible * until this time. * @param ramBufferMB The index writer buffer size in MB. * @param maxMergeMB NRTCachingDirectory max merge size in MB. * @param maxCachedMB NRTCachingDirectory max cached MB. * @param analyzer The default {@link Analyzer}. */ public LuceneIndex(RowMapper rowMapper, String path, Double refreshSeconds, Integer ramBufferMB, Integer maxMergeMB, Integer maxCachedMB, Analyzer analyzer) { this.rowMapper = rowMapper; this.path = path; this.refreshSeconds = refreshSeconds; this.ramBufferMB = ramBufferMB; this.maxMergeMB = maxMergeMB; this.maxCachedMB = maxCachedMB; this.analyzer = analyzer; } /** * Initializes this using the specified {@link Sort} for trying to keep the {@link Document}s sorted. * * @param sort The {@link Sort} to be used. */ public void init(Sort sort) { Log.debug("Initializing index"); try { this.sort = sort; // Get directory file file = new File(path); // Open or create directory FSDirectory fsDirectory = FSDirectory.open(file); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, analyzer); config.setRAMBufferSizeMB(ramBufferMB); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setUseCompoundFile(true); config.setMergePolicy(new SortingMergePolicy(config.getMergePolicy(), sort)); indexWriter = new IndexWriter(directory, config); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { public IndexSearcher newSearcher(IndexReader reader) throws IOException { IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds); searcherReopener.start(); // Start the refresher thread } catch (IOException e) { Log.error(e, "Error while initializing index"); throw new RuntimeException(e); } } /** * Updates the specified {@link Document} by first deleting the documents containing {@code Term} and then adding * the new document. The delete and then add are atomic as seen by a reader on the same index (flush may happen only * after the add). * * @param term The {@link Term} to identify the document(s) to be deleted. * @param document The {@link Document} to be added. */ public void upsert(Term term, Document document) { Log.debug("Updating document %s with term %s", document, term); try { indexWriter.updateDocument(term, document); } catch (IOException e) { Log.error(e, "Error while updating document %s with term %s", document, term); throw new RuntimeException(e); } } /** * Deletes all the {@link Document}s containing the specified {@link Term}. * * @param term The {@link Term} to identify the documents to be deleted. */ public void delete(Term term) { Log.debug(String.format("Deleting by term %s", term)); try { indexWriter.deleteDocuments(term); } catch (IOException e) { Log.error(e, "Error while deleting by term %s", term); throw new RuntimeException(e); } } /** * Deletes all the {@link Document}s satisfying the specified {@link Query}. * * @param query The {@link Query} to identify the documents to be deleted. */ public void delete(Query query) { Log.debug("Deleting by query %s", query); try { indexWriter.deleteDocuments(query); } catch (IOException e) { Log.error(e, "Error while deleting by query %s", query); throw new RuntimeException(e); } } /** * Deletes all the {@link Document}s. */ public void truncate() { Log.info("Truncating index"); try { indexWriter.deleteAll(); } catch (IOException e) { Log.error(e, "Error while truncating index"); throw new RuntimeException(e); } } /** * Commits the pending changes. */ public void commit() { Log.info("Committing"); try { indexWriter.commit(); } catch (IOException e) { Log.error(e, "Error while committing"); throw new RuntimeException(e); } } /** * Commits all changes to the index, waits for pending merges to complete, and closes all associated resources. */ public void close() { Log.info("Closing index"); try { Log.info("Closing"); searcherReopener.interrupt(); searcherManager.close(); indexWriter.close(); directory.close(); } catch (IOException e) { Log.error(e, "Error while closing index"); throw new RuntimeException(e); } } /** * Closes the index and removes all its files. */ public void delete() { Log.info("Removing"); close(); FileUtils.deleteRecursive(file); } /** * Finds the top {@code count} hits for {@code query}, applying {@code clusteringKeyFilter} if non-null, and sorting * the hits by the criteria in {@code sortFields}. * * @param query The {@link Query} to search for. * @param sort The {@link Sort} to be applied. * @param after The starting {@link SearchResult}. * @param count Return only the top {@code count} results. * @param fieldsToLoad The name of the fields to be loaded. * @return The found documents, sorted according to the supplied {@link Sort} instance. */ public List<SearchResult> search(Query query, Sort sort, SearchResult after, Integer count, Set<String> fieldsToLoad, boolean usesRelevance) { Log.debug("Searching by query %s", query); try { IndexSearcher searcher = searcherManager.acquire(); try { // Search ScoreDoc start = after == null ? null : after.getScoreDoc(); TopDocs topDocs = topDocs(searcher, query, sort, start, count, usesRelevance); ScoreDoc[] scoreDocs = topDocs.scoreDocs; // Collect the documents from query result List<SearchResult> searchResults = new ArrayList<>(scoreDocs.length); for (ScoreDoc scoreDoc : scoreDocs) { Document document = searcher.doc(scoreDoc.doc, fieldsToLoad); SearchResult searchResult = rowMapper.searchResult(document, scoreDoc); searchResults.add(searchResult); } return searchResults; } finally { searcherManager.release(searcher); } } catch (IOException e) { Log.error(e, "Error while searching by query %s", query); throw new RuntimeException(e); } } private TopDocs topDocs(IndexSearcher searcher, Query query, Sort sort, ScoreDoc after, int count, boolean usesRelevance) throws IOException { if (sort == null) { if (!usesRelevance) { FieldDoc start = after == null ? null : (FieldDoc) after; TopFieldCollector tfc = TopFieldCollector.create(this.sort, count, start, true, false, false, false); Collector collector = new EarlyTerminatingSortingCollector(tfc, this.sort, count); searcher.search(query, collector); return tfc.topDocs(); } else { return searcher.searchAfter(after, query, count); } } else { return searcher.searchAfter(after, query, count, sort); } } /** * Optimizes the index forcing merge segments leaving one single segment. This operation blocks until all merging * completes. */ public void optimize() { Log.debug("Optimizing index"); try { indexWriter.forceMerge(1, true); indexWriter.commit(); } catch (IOException e) { Log.error(e, "Error while optimizing index"); throw new RuntimeException(e); } } /** * Returns the total number of {@link Document}s in this index. * * @return The total number of {@link Document}s in this index. */ public long getNumDocs() { Log.debug("Getting num docs"); try { IndexSearcher searcher = searcherManager.acquire(); try { return searcher.getIndexReader().numDocs(); } finally { searcherManager.release(searcher); } } catch (IOException e) { Log.error(e, "Error while getting num docs"); throw new RuntimeException(e); } } }