LuceneIndex.java example

Explorer
stratio-cassandra-master
/*
 * Copyright 2014, Stratio.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.stratio.cassandra.index.service;

import com.stratio.cassandra.util.Log;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
import org.apache.lucene.index.sorter.SortingMergePolicy;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

/**
 * Class wrapping a Lucene directory and its readers , writers and searchers for NRT.
 *
 * @author Andres de la Pena <adelapena@stratio.com>
 */
public class LuceneIndex {

    private final RowMapper rowMapper;
    private final String path;
    private final Double refreshSeconds;
    private final Integer ramBufferMB;
    private final Integer maxMergeMB;
    private final Integer maxCachedMB;
    private final Analyzer analyzer;

    private File file;
    private Directory directory;
    private IndexWriter indexWriter;
    private SearcherManager searcherManager;
    private ControlledRealTimeReopenThread<IndexSearcher> searcherReopener;

    private Sort sort;

    static {
        BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    }

    /**
     * Builds a new {@code RowDirectory} using the specified directory path and analyzer.
     *
     * @param rowMapper      A {@link RowMapper}.
     * @param path           The analyzer to be used. The path of the directory in where the Lucene files will be
     *                       stored.
     * @param refreshSeconds The index readers refresh time in seconds. No guarantees that the writings are visible
     *                       until this time.
     * @param ramBufferMB    The index writer buffer size in MB.
     * @param maxMergeMB     NRTCachingDirectory max merge size in MB.
     * @param maxCachedMB    NRTCachingDirectory max cached MB.
     * @param analyzer       The default {@link Analyzer}.
     */
    public LuceneIndex(RowMapper rowMapper,
                       String path,
                       Double refreshSeconds,
                       Integer ramBufferMB,
                       Integer maxMergeMB,
                       Integer maxCachedMB,
                       Analyzer analyzer) {
        this.rowMapper = rowMapper;
        this.path = path;
        this.refreshSeconds = refreshSeconds;
        this.ramBufferMB = ramBufferMB;
        this.maxMergeMB = maxMergeMB;
        this.maxCachedMB = maxCachedMB;
        this.analyzer = analyzer;
    }

    /**
     * Initializes this using the specified {@link Sort} for trying to keep the {@link Document}s sorted.
     *
     * @param sort The {@link Sort} to be used.
     */
    public void init(Sort sort) {
        Log.debug("Initializing index");
        try {
            this.sort = sort;

            // Get directory file
            file = new File(path);

            // Open or create directory
            FSDirectory fsDirectory = FSDirectory.open(file);
            directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB);

            // Setup index writer
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, analyzer);
            config.setRAMBufferSizeMB(ramBufferMB);
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            config.setUseCompoundFile(true);
            config.setMergePolicy(new SortingMergePolicy(config.getMergePolicy(), sort));
            indexWriter = new IndexWriter(directory, config);

            // Setup NRT search
            SearcherFactory searcherFactory = new SearcherFactory() {
                public IndexSearcher newSearcher(IndexReader reader) throws IOException {
                    IndexSearcher searcher = new IndexSearcher(reader);
                    searcher.setSimilarity(new NoIDFSimilarity());
                    return searcher;
                }
            };
            TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter);
            searcherManager = new SearcherManager(indexWriter, true, searcherFactory);
            searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter,
                                                                    searcherManager,
                                                                    refreshSeconds,
                                                                    refreshSeconds);
            searcherReopener.start(); // Start the refresher thread
        } catch (IOException e) {
            Log.error(e, "Error while initializing index");
            throw new RuntimeException(e);
        }
    }

    /**
     * Updates the specified {@link Document} by first deleting the documents containing {@code Term} and then adding
     * the new document. The delete and then add are atomic as seen by a reader on the same index (flush may happen only
     * after the add).
     *
     * @param term     The {@link Term} to identify the document(s) to be deleted.
     * @param document The {@link Document} to be added.
     */
    public void upsert(Term term, Document document) {
        Log.debug("Updating document %s with term %s", document, term);
        try {
            indexWriter.updateDocument(term, document);
        } catch (IOException e) {
            Log.error(e, "Error while updating document %s with term %s", document, term);
            throw new RuntimeException(e);
        }
    }

    /**
     * Deletes all the {@link Document}s containing the specified {@link Term}.
     *
     * @param term The {@link Term} to identify the documents to be deleted.
     */
    public void delete(Term term) {
        Log.debug(String.format("Deleting by term %s", term));
        try {
            indexWriter.deleteDocuments(term);
        } catch (IOException e) {
            Log.error(e, "Error while deleting by term %s", term);
            throw new RuntimeException(e);
        }
    }

    /**
     * Deletes all the {@link Document}s satisfying the specified {@link Query}.
     *
     * @param query The {@link Query} to identify the documents to be deleted.
     */
    public void delete(Query query) {
        Log.debug("Deleting by query %s", query);
        try {
            indexWriter.deleteDocuments(query);
        } catch (IOException e) {
            Log.error(e, "Error while deleting by query %s", query);
            throw new RuntimeException(e);
        }
    }

    /**
     * Deletes all the {@link Document}s.
     */
    public void truncate() {
        Log.info("Truncating index");
        try {
            indexWriter.deleteAll();
        } catch (IOException e) {
            Log.error(e, "Error while truncating index");
            throw new RuntimeException(e);
        }
    }

    /**
     * Commits the pending changes.
     */
    public void commit() {
        Log.info("Committing");
        try {
            indexWriter.commit();
        } catch (IOException e) {
            Log.error(e, "Error while committing");
            throw new RuntimeException(e);
        }
    }

    /**
     * Commits all changes to the index, waits for pending merges to complete, and closes all associated resources.
     */
    public void close() {
        Log.info("Closing index");
        try {
            Log.info("Closing");
            searcherReopener.interrupt();
            searcherManager.close();
            indexWriter.close();
            directory.close();
        } catch (IOException e) {
            Log.error(e, "Error while closing index");
            throw new RuntimeException(e);
        }
    }

    /**
     * Closes the index and removes all its files.
     */
    public void delete() {
        Log.info("Removing");
        close();
        FileUtils.deleteRecursive(file);
    }

    /**
     * Finds the top {@code count} hits for {@code query}, applying {@code clusteringKeyFilter} if non-null, and sorting
     * the hits by the criteria in {@code sortFields}.
     *
     * @param query        The {@link Query} to search for.
     * @param sort         The {@link Sort} to be applied.
     * @param after        The starting {@link SearchResult}.
     * @param count        Return only the top {@code count} results.
     * @param fieldsToLoad The name of the fields to be loaded.
     * @return The found documents, sorted according to the supplied {@link Sort} instance.
     */
    public List<SearchResult> search(Query query,
                                     Sort sort,
                                     SearchResult after,
                                     Integer count,
                                     Set<String> fieldsToLoad,
                                     boolean usesRelevance) {
        Log.debug("Searching by query %s", query);
        try {
            IndexSearcher searcher = searcherManager.acquire();
            try {
                // Search
                ScoreDoc start = after == null ? null : after.getScoreDoc();
                TopDocs topDocs = topDocs(searcher, query, sort, start, count, usesRelevance);
                ScoreDoc[] scoreDocs = topDocs.scoreDocs;

                // Collect the documents from query result
                List<SearchResult> searchResults = new ArrayList<>(scoreDocs.length);
                for (ScoreDoc scoreDoc : scoreDocs) {
                    Document document = searcher.doc(scoreDoc.doc, fieldsToLoad);
                    SearchResult searchResult = rowMapper.searchResult(document, scoreDoc);
                    searchResults.add(searchResult);
                }

                return searchResults;
            } finally {
                searcherManager.release(searcher);
            }
        } catch (IOException e) {
            Log.error(e, "Error while searching by query %s", query);
            throw new RuntimeException(e);
        }
    }

    private TopDocs topDocs(IndexSearcher searcher,
                            Query query,
                            Sort sort,
                            ScoreDoc after,
                            int count,
                            boolean usesRelevance) throws IOException {
        if (sort == null) {
            if (!usesRelevance) {
                FieldDoc start = after == null ? null : (FieldDoc) after;
                TopFieldCollector tfc = TopFieldCollector.create(this.sort, count, start, true, false, false, false);
                Collector collector = new EarlyTerminatingSortingCollector(tfc, this.sort, count);
                searcher.search(query, collector);
                return tfc.topDocs();
            } else {
                return searcher.searchAfter(after, query, count);
            }
        } else {
            return searcher.searchAfter(after, query, count, sort);
        }
    }

    /**
     * Optimizes the index forcing merge segments leaving one single segment. This operation blocks until all merging
     * completes.
     */
    public void optimize() {
        Log.debug("Optimizing index");
        try {
            indexWriter.forceMerge(1, true);
            indexWriter.commit();
        } catch (IOException e) {
            Log.error(e, "Error while optimizing index");
            throw new RuntimeException(e);
        }
    }

    /**
     * Returns the total number of {@link Document}s in this index.
     *
     * @return The total number of {@link Document}s in this index.
     */
    public long getNumDocs() {
        Log.debug("Getting num docs");
        try {
            IndexSearcher searcher = searcherManager.acquire();
            try {
                return searcher.getIndexReader().numDocs();
            } finally {
                searcherManager.release(searcher);
            }
        } catch (IOException e) {
            Log.error(e, "Error while getting num docs");
            throw new RuntimeException(e);
        }

    }
}