/* * Licensed under the Apache License, Version 2.0 (the "License"); * * You may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * limitations under the License. * * Contributions from 2013-2017 where performed either by US government * employees, or under US Veterans Health Administration contracts. * * US Veterans Health Administration contributions by government employees * are work of the U.S. Government and are not subject to copyright * protection in the United States. Portions contributed by government * employees are USGovWork (17USC ยง105). Not subject to copyright. * * Contribution by contractors to the US Veterans Health Administration * during this period are contractually contributed under the * Apache License, Version 2.0. * * See: https://www.usa.gov/government-works * * Contributions prior to 2013: * * Copyright (C) International Health Terminology Standards Development Organisation. * Licensed under the Apache License, Version 2.0. * */ package sh.isaac.provider.query.lucene; //~--- JDK imports ------------------------------------------------------------ /** * Copyright Notice * * This is a work of the U.S. Government and is not subject to copyright * protection in the United States. Foreign copyrights may apply. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.File; import java.io.IOException; import java.io.StringReader; import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BooleanSupplier; import java.util.function.Predicate; import java.util.function.Supplier; import javax.annotation.PostConstruct; import javax.annotation.PreDestroy; //~--- non-JDK imports -------------------------------------------------------- import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.IntField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.TrackingIndexWriter; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ControlledRealTimeReopenThread; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ReferenceManager; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.util.Version; import sh.isaac.api.ConfigurationService; import sh.isaac.api.Get; import sh.isaac.api.LookupService; import sh.isaac.api.SystemStatusService; import sh.isaac.api.chronicle.ObjectChronology; import sh.isaac.api.commit.ChronologyChangeListener; import sh.isaac.api.commit.CommitRecord; import sh.isaac.api.component.concept.ConceptChronology; import sh.isaac.api.component.sememe.SememeChronology; import sh.isaac.api.component.sememe.version.SememeVersion; import sh.isaac.api.identity.StampedVersion; import sh.isaac.api.index.ComponentSearchResult; import sh.isaac.api.index.ConceptSearchResult; import sh.isaac.api.index.IndexServiceBI; import sh.isaac.api.index.IndexedGenerationCallable; import sh.isaac.api.index.SearchResult; import sh.isaac.api.util.NamedThreadFactory; import sh.isaac.api.util.UuidT5Generator; import sh.isaac.api.util.WorkExecutors; import sh.isaac.provider.query.lucene.indexers.DescriptionIndexer; import sh.isaac.provider.query.lucene.indexers.SememeIndexer; import sh.isaac.provider.query.lucene.indexers.TopDocsFilteredCollector; import sh.isaac.utility.Frills; //~--- classes ---------------------------------------------------------------- //See example for help with the Controlled Real-time indexing... /** * The Class LuceneIndexer. */ //http://stackoverflow.com/questions/17993960/lucene-4-4-0-new-controlledrealtimereopenthread-sample-usage?answertab=votes#tab-top public abstract class LuceneIndexer implements IndexServiceBI { /** The Constant DEFAULT_LUCENE_FOLDER. */ public static final String DEFAULT_LUCENE_FOLDER = "lucene"; /** The Constant log. */ private static final Logger log = LogManager.getLogger(); /** The Constant luceneVersion. */ public static final Version luceneVersion = Version.LUCENE_4_10_3; /** The Constant unindexedFuture. */ private static final UnindexedFuture unindexedFuture = new UnindexedFuture(); // don't need to analyze this - and even though it is an integer, we index it as a string, as that is faster when we are only doing /** The Constant FIELD_SEMEME_ASSEMBLAGE_SEQUENCE. */ // exact matches. protected static final String FIELD_SEMEME_ASSEMBLAGE_SEQUENCE = "_sememe_type_sequence_" + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER; /** The Constant FIELD_COMPONENT_NID. */ // this isn't indexed public static final String FIELD_COMPONENT_NID = "_component_nid_"; /** The Constant FIELD_TYPE_INT_STORED_NOT_INDEXED. */ protected static final FieldType FIELD_TYPE_INT_STORED_NOT_INDEXED; //~--- static initializers ------------------------------------------------- static { FIELD_TYPE_INT_STORED_NOT_INDEXED = new FieldType(); FIELD_TYPE_INT_STORED_NOT_INDEXED.setNumericType(FieldType.NumericType.INT); FIELD_TYPE_INT_STORED_NOT_INDEXED.setIndexed(false); FIELD_TYPE_INT_STORED_NOT_INDEXED.setStored(true); FIELD_TYPE_INT_STORED_NOT_INDEXED.setTokenized(false); FIELD_TYPE_INT_STORED_NOT_INDEXED.freeze(); } //~--- fields -------------------------------------------------------------- /** The index folder. */ private File indexFolder = null; /** The indexed component statistics. */ private final HashMap<String, AtomicInteger> indexedComponentStatistics = new HashMap<>(); /** The indexed component statistics block. */ private final Semaphore indexedComponentStatisticsBlock = new Semaphore(1); /** The component nid latch. */ private final ConcurrentHashMap<Integer, IndexedGenerationCallable> componentNidLatch = new ConcurrentHashMap<>(); /** The enabled. */ private boolean enabled = true; /** The db build mode. */ private Boolean dbBuildMode = null; /** The database validity. */ private DatabaseValidity databaseValidity = DatabaseValidity.NOT_SET; /** The change listener ref. */ private ChronologyChangeListener changeListenerRef; /** The lucene writer service. */ protected final ExecutorService luceneWriterService; /** The lucene writer future checker service. */ protected ExecutorService luceneWriterFutureCheckerService; /** The reopen thread. */ private final ControlledRealTimeReopenThread<IndexSearcher> reopenThread; /** The tracking index writer. */ private final TrackingIndexWriter trackingIndexWriter; /** The searcher manager. */ private final ReferenceManager<IndexSearcher> searcherManager; /** The index name. */ private final String indexName; //~--- constructors -------------------------------------------------------- /** * Instantiates a new lucene indexer. * * @param indexName the index name * @throws IOException Signals that an I/O exception has occurred. */ protected LuceneIndexer(String indexName) throws IOException { try { this.indexName = indexName; this.luceneWriterService = LookupService.getService(WorkExecutors.class) .getIOExecutor(); this.luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1, new NamedThreadFactory(indexName + " Lucene future checker", false)); final Path searchFolder = LookupService.getService(ConfigurationService.class) .getSearchFolderPath(); final File luceneRootFolder = new File(searchFolder.toFile(), DEFAULT_LUCENE_FOLDER); luceneRootFolder.mkdirs(); this.indexFolder = new File(luceneRootFolder, indexName); if (!this.indexFolder.exists()) { this.databaseValidity = DatabaseValidity.MISSING_DIRECTORY; log.info("Index folder missing: " + this.indexFolder.getAbsolutePath()); } else if (this.indexFolder.list().length > 0) { this.databaseValidity = DatabaseValidity.POPULATED_DIRECTORY; } this.indexFolder.mkdirs(); log.info("Index: " + this.indexFolder.getAbsolutePath()); final Directory indexDirectory = new MMapDirectory(this.indexFolder); // switch over to MMapDirectory - in theory - this gives us back some // room on the JDK stack, letting the OS directly manage the caching of the index files - and more importantly, gives us a huge // performance boost during any operation that tries to do multi-threaded reads of the index (like the SOLOR rules processing) because // the default value of SimpleFSDirectory is a huge bottleneck. indexDirectory.clearLock("write.lock"); final IndexWriterConfig config = new IndexWriterConfig(luceneVersion, new PerFieldAnalyzer()); config.setRAMBufferSizeMB(256); final MergePolicy mergePolicy = new LogByteSizeMergePolicy(); config.setMergePolicy(mergePolicy); config.setSimilarity(new ShortTextSimilarity()); final IndexWriter indexWriter = new IndexWriter(indexDirectory, config); this.trackingIndexWriter = new TrackingIndexWriter(indexWriter); final boolean applyAllDeletes = false; this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, null); // [3]: Create the ControlledRealTimeReopenThread that reopens the index periodically taking into // account the changes made to the index and tracked by the TrackingIndexWriter instance // The index is refreshed every 60sc when nobody is waiting // and every 100 millis whenever is someone waiting (see search method) // (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html) this.reopenThread = new ControlledRealTimeReopenThread<>(this.trackingIndexWriter, this.searcherManager, 60.00, 0.1); this.startThread(); // Register for commits: log.info("Registering indexer " + getIndexerName() + " for commits"); this.changeListenerRef = new ChronologyChangeListener() { @Override public void handleCommit(CommitRecord commitRecord) { if (LuceneIndexer.this.dbBuildMode == null) { LuceneIndexer.this.dbBuildMode = Get.configurationService() .inDBBuildMode(); } if (LuceneIndexer.this.dbBuildMode) { log.debug("Ignore commit due to db build mode"); return; } final int size = commitRecord.getSememesInCommit() .size(); if (size < 100) { log.info("submitting sememes " + commitRecord.getSememesInCommit().toString() + " to indexer " + getIndexerName() + " due to commit"); } else { log.info("submitting " + size + " sememes to indexer " + getIndexerName() + " due to commit"); } commitRecord.getSememesInCommit().stream().forEach(sememeId -> { final SememeChronology<?> sc = Get.sememeService() .getSememe(sememeId); index(sc); }); } @Override public void handleChange(SememeChronology<? extends SememeVersion<?>> sc) { // noop } @Override public void handleChange(ConceptChronology<? extends StampedVersion> cc) { // noop } @Override public UUID getListenerUuid() { return UuidT5Generator.get(getIndexerName()); } }; Get.commitService() .addChangeListener(this.changeListenerRef); } catch (final Exception e) { LookupService.getService(SystemStatusService.class) .notifyServiceConfigurationFailure(indexName, e); throw e; } } //~--- methods ------------------------------------------------------------- /** * Clear database validity value. */ @Override public void clearDatabaseValidityValue() { // Reset to enforce analysis this.databaseValidity = DatabaseValidity.NOT_SET; } /** * Clear index. */ @Override public final void clearIndex() { try { this.trackingIndexWriter.deleteAll(); } catch (final IOException ex) { throw new RuntimeException(ex); } } /** * Clear indexed statistics. */ @Override public void clearIndexedStatistics() { this.indexedComponentStatistics.clear(); } /** * Close writer. */ @Override public final void closeWriter() { try { this.reopenThread.close(); // We don't shutdown the writer service we are using, because it is the core isaac thread pool. // waiting for the future checker service is sufficient to ensure that all write operations are complete. this.luceneWriterFutureCheckerService.shutdown(); this.luceneWriterFutureCheckerService.awaitTermination(15, TimeUnit.MINUTES); this.trackingIndexWriter.getIndexWriter() .close(); } catch (IOException | InterruptedException ex) { throw new RuntimeException(ex); } } /** * Commit writer. */ @Override public final void commitWriter() { try { this.trackingIndexWriter.getIndexWriter() .commit(); this.searcherManager.maybeRefreshBlocking(); } catch (final IOException ex) { throw new RuntimeException(ex); } } /** * Force merge. */ @Override public void forceMerge() { try { this.trackingIndexWriter.getIndexWriter() .forceMerge(1); this.searcherManager.maybeRefreshBlocking(); } catch (final IOException ex) { throw new RuntimeException(ex); } } /** * Index. * * @param chronicle the chronicle * @return the future */ @Override public final Future<Long> index(ObjectChronology<?> chronicle) { return index((() -> new AddDocument(chronicle)), (() -> indexChronicle(chronicle)), chronicle.getNid()); } /** * Merge results on concept. * * @param searchResult the search result * @return the list */ @Override public List<ConceptSearchResult> mergeResultsOnConcept(List<SearchResult> searchResult) { final HashMap<Integer, ConceptSearchResult> merged = new HashMap<>(); final List<ConceptSearchResult> result = new ArrayList<>(); for (final SearchResult sr: searchResult) { final int conSequence = Frills.findConcept(sr.getNid()); if (conSequence < 0) { log.error("Failed to find a concept that references nid " + sr.getNid()); } else if (merged.containsKey(conSequence)) { merged.get(conSequence) .merge(sr); } else { final ConceptSearchResult csr = new ConceptSearchResult(conSequence, sr.getNid(), sr.getScore()); merged.put(conSequence, csr); result.add(csr); } } return result; } /** * Query index with no specified target generation of the index. * * Calls {@link #query(String, Integer, int, long)} with the semeneConceptSequence set to null and * the targetGeneration field set to Long.MIN_VALUE * * @param query The query to apply. * @param sizeLimit The maximum size of the result list. * @return a List of {@code SearchResult} that contains the nid of the * component that matched, and the score of that match relative to other matches. */ @Override public final List<SearchResult> query(String query, int sizeLimit) { return query(query, null, sizeLimit, Long.MIN_VALUE); } /** * * Calls {@link #query(String, boolean, Integer, int, long)} with the prefixSearch field set to false. * * @param query The query to apply. * @param semeneConceptSequence optional - The concept seqeuence of the sememe that you wish to search within. If null, * searches all indexed content. This would be set to the concept sequence of {@link MetaData#ENGLISH_DESCRIPTION_ASSEMBLAGE} * or the concept sequence {@link MetaData#SCTID} for example. * @param sizeLimit The maximum size of the result list. * @param targetGeneration target generation that must be included in the search or Long.MIN_VALUE if there is no * need to wait for a target generation. Long.MAX_VALUE can be passed in to force this query to wait until any * in-progress indexing operations are completed - and then use the latest index. * @return a List of {@code SearchResult} that contains the nid of the component that matched, and the score of * that match relative to other matches. */ @Override public final List<SearchResult> query(String query, Integer[] semeneConceptSequence, int sizeLimit, Long targetGeneration) { return query(query, false, semeneConceptSequence, sizeLimit, targetGeneration); } /** * A generic query API that handles most common cases. The cases handled for various component property types * are detailed below. * * NOTE - subclasses of LuceneIndexer may have other query(...) methods that allow for more specific and or complex * queries. Specifically both {@link SememeIndexer} and {@link DescriptionIndexer} have their own * query(...) methods which allow for more advanced queries. * * @param query The query to apply. * @param prefixSearch if true, utilize a search algorithm that is optimized for prefix searching, such as the searching * that would be done to implement a type-ahead style search. Does not use the Lucene Query parser. Every term (or token) * that is part of the query string will be required to be found in the result. * * Note, it is useful to NOT trim the text of the query before it is sent in - if the last word of the query has a * space character following it, that word will be required as a complete term. If the last word of the query does not * have a space character following it, that word will be required as a prefix match only. * * For example: * The query "family test" will return results that contain 'Family Testudinidae' * The query "family test " will not match on 'Testudinidae', so that will be excluded. * @param sememeConceptSequence the sememe concept sequence * @param sizeLimit The maximum size of the result list. * @param targetGeneration target generation that must be included in the search or Long.MIN_VALUE if there is no need * to wait for a target generation. Long.MAX_VALUE can be passed in to force this query to wait until any in progress * indexing operations are completed - and then use the latest index. * @return a List of {@link SearchResult} that contains the nid of the component that matched, and the score of that match relative * to other matches. */ @Override public abstract List<SearchResult> query(String query, boolean prefixSearch, Integer[] sememeConceptSequence, int sizeLimit, Long targetGeneration); /** * Report indexed items. * * @return the hash map */ @Override public HashMap<String, Integer> reportIndexedItems() { final HashMap<String, Integer> result = new HashMap<>(); this.indexedComponentStatistics.forEach((name, value) -> { result.put(name, value.get()); }); return result; } /** * Adds the fields. * * @param chronicle the chronicle * @param doc the doc */ protected abstract void addFields(ObjectChronology<?> chronicle, Document doc); /** * Builds the prefix query. * * @param searchString the search string * @param field the field * @param analyzer the analyzer * @return the query * @throws IOException Signals that an I/O exception has occurred. */ protected Query buildPrefixQuery(String searchString, String field, Analyzer analyzer) throws IOException { final TokenStream tokenStream; final List<String> terms; try (StringReader textReader = new StringReader(searchString)) { tokenStream = analyzer.tokenStream(field, textReader); tokenStream.reset(); terms = new ArrayList<>(); final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { terms.add(charTermAttribute.toString()); } } tokenStream.close(); analyzer.close(); final BooleanQuery bq = new BooleanQuery(); if ((terms.size() > 0) &&!searchString.endsWith(" ")) { final String last = terms.remove(terms.size() - 1); bq.add(new PrefixQuery((new Term(field, last))), Occur.MUST); } terms.stream().forEach((s) -> { bq.add(new TermQuery(new Term(field, s)), Occur.MUST); }); return bq; } /** * Create a query that will match on the specified text using either the WhitespaceAnalyzer or the StandardAnalyzer. * Uses the Lucene Query Parser if prefixSearch is false, otherwise, uses a custom prefix algorithm. * See {@link LuceneIndexer#query(String, boolean, Integer, int, Long)} for details on the prefix search algorithm. * * @param query the query * @param field the field * @param prefixSearch the prefix search * @return the query */ protected Query buildTokenizedStringQuery(String query, String field, boolean prefixSearch) { try { final BooleanQuery bq = new BooleanQuery(); if (prefixSearch) { bq.add(buildPrefixQuery(query, field, new PerFieldAnalyzer()), Occur.SHOULD); bq.add(buildPrefixQuery(query, field + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, new PerFieldAnalyzer()), Occur.SHOULD); } else { final QueryParser qp1 = new QueryParser(field, new PerFieldAnalyzer()); qp1.setAllowLeadingWildcard(true); bq.add(qp1.parse(query), Occur.SHOULD); final QueryParser qp2 = new QueryParser(field + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, new PerFieldAnalyzer()); qp2.setAllowLeadingWildcard(true); bq.add(qp2.parse(query), Occur.SHOULD); } final BooleanQuery wrap = new BooleanQuery(); wrap.add(bq, Occur.MUST); return wrap; } catch (IOException | ParseException e) { throw new RuntimeException(e); } } /** * Increment indexed item count. * * @param name the name */ protected void incrementIndexedItemCount(String name) { AtomicInteger temp = this.indexedComponentStatistics.get(name); if (temp == null) { try { this.indexedComponentStatisticsBlock.acquireUninterruptibly(); temp = this.indexedComponentStatistics.get(name); if (temp == null) { temp = new AtomicInteger(0); this.indexedComponentStatistics.put(name, temp); } } finally { this.indexedComponentStatisticsBlock.release(); } } temp.incrementAndGet(); } /** * Index chronicle. * * @param chronicle the chronicle * @return true, if successful */ protected abstract boolean indexChronicle(ObjectChronology<?> chronicle); /** * Release latch. * * @param latchNid the latch nid * @param indexGeneration the index generation */ protected void releaseLatch(int latchNid, long indexGeneration) { final IndexedGenerationCallable latch = this.componentNidLatch.remove(latchNid); if (latch != null) { latch.setIndexGeneration(indexGeneration); } } /** * Restrict to sememe. * * @param query the query * @param sememeConceptSequence the sememe concept sequence * @return the query */ protected Query restrictToSememe(Query query, Integer[] sememeConceptSequence) { final ArrayList<Integer> nullSafe = new ArrayList<>(); if (sememeConceptSequence != null) { for (final Integer i: sememeConceptSequence) { if (i != null) { nullSafe.add(i); } } } if (nullSafe.size() > 0) { final BooleanQuery outerWrap = new BooleanQuery(); outerWrap.add(query, Occur.MUST); final BooleanQuery wrap = new BooleanQuery(); // or together the sememeConceptSequences, but require at least one of them to match. for (final int i: nullSafe) { wrap.add(new TermQuery(new Term(FIELD_SEMEME_ASSEMBLAGE_SEQUENCE, i + "")), Occur.SHOULD); } outerWrap.add(wrap, Occur.MUST); return outerWrap; } else { return query; } } /** * Subclasses may call this method with much more specific queries than this generic class is capable of constructing. * * @param q - the query * @param sizeLimit - how many results to return (at most) * @param targetGeneration - target generation that must be included in the search or Long.MIN_VALUE if there is no need * to wait for a target generation. Long.MAX_VALUE can be passed in to force this query to wait until any in progress * indexing operations are completed - and then use the latest index. * @param filter - an optional filter on results - if provided, the filter should expect nids, and can return true, if * the nid should be allowed in the result, false otherwise. Note that this may cause large performance slowdowns, depending * on the implementation of your filter * @return the list */ protected final List<SearchResult> search(Query q, int sizeLimit, Long targetGeneration, Predicate<Integer> filter) { try { if ((targetGeneration != null) && (targetGeneration != Long.MIN_VALUE)) { if (targetGeneration == Long.MAX_VALUE) { this.searcherManager.maybeRefreshBlocking(); } else { try { this.reopenThread.waitForGeneration(targetGeneration); } catch (final InterruptedException e) { throw new RuntimeException(e); } } } final IndexSearcher searcher = this.searcherManager.acquire(); try { log.debug("Running query: {}", q.toString()); // Since the index carries some duplicates by design, which we will remove - get a few extra results up front. // so we are more likely to come up with the requested number of results final long limitWithExtras = sizeLimit + (long) (sizeLimit * 0.25d); final int adjustedLimit = ((limitWithExtras > Integer.MAX_VALUE) ? sizeLimit : (int) limitWithExtras); TopDocs topDocs; if (filter != null) { final TopDocsFilteredCollector tdf = new TopDocsFilteredCollector(adjustedLimit, q, searcher, filter); searcher.search(q, tdf); topDocs = tdf.getTopDocs(); } else { topDocs = searcher.search(q, adjustedLimit); } final List<SearchResult> results = new ArrayList<>(topDocs.totalHits); final HashSet<Integer> includedComponentNids = new HashSet<>(); for (final ScoreDoc hit: topDocs.scoreDocs) { log.debug("Hit: {} Score: {}", new Object[] { hit.doc, hit.score }); final Document doc = searcher.doc(hit.doc); final int componentNid = doc.getField(FIELD_COMPONENT_NID) .numericValue() .intValue(); if (includedComponentNids.contains(componentNid)) { continue; } else { includedComponentNids.add(componentNid); results.add(new ComponentSearchResult(componentNid, hit.score)); if (results.size() == sizeLimit) { break; } } } log.debug("Returning {} results from query", results.size()); return results; } finally { this.searcherManager.release(searcher); } } catch (final IOException ex) { throw new RuntimeException(ex); } } /** * Index. * * @param documentSupplier the document supplier * @param indexChronicle the index chronicle * @param chronicleNid the chronicle nid * @return the future */ private Future<Long> index(Supplier<AddDocument> documentSupplier, BooleanSupplier indexChronicle, int chronicleNid) { if (!this.enabled) { releaseLatch(chronicleNid, Long.MIN_VALUE); return null; } if (indexChronicle.getAsBoolean()) { final Future<Long> future = this.luceneWriterService.submit(documentSupplier.get()); this.luceneWriterFutureCheckerService.execute(new FutureChecker(future)); return future; } else { releaseLatch(chronicleNid, Long.MIN_VALUE); } return unindexedFuture; } /** * Start me. */ @PostConstruct private void startMe() { log.info("Starting " + getIndexerName() + " post-construct"); } /** * Start thread. */ private void startThread() { this.reopenThread.setName("Lucene " + this.indexName + " Reopen Thread"); this.reopenThread.setPriority(Math.min(Thread.currentThread() .getPriority() + 2, Thread.MAX_PRIORITY)); this.reopenThread.setDaemon(true); this.reopenThread.start(); } /** * Stop me. */ @PreDestroy private void stopMe() { log.info("Stopping " + getIndexerName() + " pre-destroy. "); commitWriter(); closeWriter(); } //~--- get methods --------------------------------------------------------- /** * Gets the database folder. * * @return the database folder */ @Override public Path getDatabaseFolder() { return this.indexFolder.toPath(); } /** * Gets the database validity status. * * @return the database validity status */ @Override public DatabaseValidity getDatabaseValidityStatus() { return this.databaseValidity; } /** * Checks if enabled. * * @return true, if enabled */ @Override public boolean isEnabled() { return this.enabled; } //~--- set methods --------------------------------------------------------- /** * Sets the enabled. * * @param enabled the new enabled */ @Override public void setEnabled(boolean enabled) { this.enabled = enabled; } //~--- get methods --------------------------------------------------------- /** * Gets the indexed generation callable. * * @param nid for the component that the caller wished to wait until it's document is added to the index. * @return a {@link IndexedGenerationCallable} object that will block until this indexer has added the * document to the index. The {@link IndexedGenerationCallable#call()} method on the object will return the * index generation that contains the document, which can be used in search calls to make sure the generation * is available to the searcher. */ @Override public IndexedGenerationCallable getIndexedGenerationCallable(int nid) { final IndexedGenerationCallable indexedLatch = new IndexedGenerationCallable(); final IndexedGenerationCallable existingIndexedLatch = this.componentNidLatch.putIfAbsent(nid, indexedLatch); if (existingIndexedLatch != null) { return existingIndexedLatch; } return indexedLatch; } /** * Gets the indexer folder. * * @return the indexer folder */ @Override public File getIndexerFolder() { return this.indexFolder; } /** * Gets the indexer name. * * @return the indexer name */ @Override public String getIndexerName() { return this.indexName; } //~--- inner classes ------------------------------------------------------- /** * The Class AddDocument. */ private class AddDocument implements Callable<Long> { /** The chronicle. */ ObjectChronology<?> chronicle = null; //~--- constructors ----------------------------------------------------- /** * Instantiates a new adds the document. * * @param chronicle the chronicle */ public AddDocument(ObjectChronology<?> chronicle) { this.chronicle = chronicle; } //~--- methods ---------------------------------------------------------- /** * Call. * * @return the long * @throws Exception the exception */ @Override public Long call() throws Exception { final Document doc = new Document(); doc.add(new IntField(FIELD_COMPONENT_NID, this.chronicle.getNid(), LuceneIndexer.FIELD_TYPE_INT_STORED_NOT_INDEXED)); addFields(this.chronicle, doc); // Note that the addDocument operation could cause duplicate documents to be // added to the index if a new luceneVersion is added after initial index // creation. It does this to avoid the performance penalty of // finding and deleting documents prior to inserting a new one. // // At this point, the number of duplicates should be // small, and we are willing to accept a small number of duplicates // because the new versions are additive (we don't allow deletion of content) // so the search results will be the same. Duplicates can be removed // by regenerating the index. final long indexGeneration = LuceneIndexer.this.trackingIndexWriter.addDocument(doc); releaseLatch(getNid(), indexGeneration); return indexGeneration; } //~--- get methods ------------------------------------------------------ /** * Gets the nid. * * @return the nid */ public int getNid() { return this.chronicle.getNid(); } } /** * Class to ensure that any exceptions associated with indexingFutures are properly logged. */ private static class FutureChecker implements Runnable { /** The future. */ Future<Long> future; //~--- constructors ----------------------------------------------------- /** * Instantiates a new future checker. * * @param future the future */ public FutureChecker(Future<Long> future) { this.future = future; } //~--- methods ---------------------------------------------------------- /** * Run. */ @Override public void run() { try { this.future.get(); } catch (InterruptedException | ExecutionException ex) { log.fatal("Unexpected error in future checker!", ex); } } } /** * The Class UnindexedFuture. */ private static class UnindexedFuture implements Future<Long> { /** * Cancel. * * @param mayInterruptIfRunning the may interrupt if running * @return true, if successful */ @Override public boolean cancel(boolean mayInterruptIfRunning) { return false; } //~--- get methods ------------------------------------------------------ /** * Checks if cancelled. * * @return true, if cancelled */ @Override public boolean isCancelled() { return false; } /** * Checks if done. * * @return true, if done */ @Override public boolean isDone() { return true; } /** * Gets the. * * @return the long * @throws InterruptedException the interrupted exception * @throws ExecutionException the execution exception */ @Override public Long get() throws InterruptedException, ExecutionException { return Long.MIN_VALUE; } /** * Gets the. * * @param timeout the timeout * @param unit the unit * @return the long * @throws InterruptedException the interrupted exception * @throws ExecutionException the execution exception * @throws TimeoutException the timeout exception */ @Override public Long get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { return Long.MIN_VALUE; } } }