/*
* Licensed under the Apache License, Version 2.0 (the "License");
*
* You may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributions from 2013-2017 where performed either by US government
* employees, or under US Veterans Health Administration contracts.
*
* US Veterans Health Administration contributions by government employees
* are work of the U.S. Government and are not subject to copyright
* protection in the United States. Portions contributed by government
* employees are USGovWork (17USC ยง105). Not subject to copyright.
*
* Contribution by contractors to the US Veterans Health Administration
* during this period are contractually contributed under the
* Apache License, Version 2.0.
*
* See: https://www.usa.gov/government-works
*
* Contributions prior to 2013:
*
* Copyright (C) International Health Terminology Standards Development Organisation.
* Licensed under the Apache License, Version 2.0.
*
*/
package sh.isaac.api.index;
//~--- JDK imports ------------------------------------------------------------
import java.io.File;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Future;
//~--- non-JDK imports --------------------------------------------------------
import org.jvnet.hk2.annotations.Contract;
import sh.isaac.api.DatabaseServices;
import sh.isaac.api.chronicle.ObjectChronology;
//~--- interfaces -------------------------------------------------------------
/**
* The contract interface for indexing services.
* <br>
* {@code IndexService} implementations
* must not throw exceptions. Throwing exceptions could cause the underlying
* source data to corrupt. Since indexes can be regenerated, indexes should
* mark themselves as invalid somehow, and recreate themselves when necessary.
* @author aimeefurber
* @author kec
*/
@Contract
public interface IndexServiceBI
extends DatabaseServices {
/**
* Clear index, resulting in an empty index. Used prior to the
* environment recreating the index by iterating over all components
* and calling the {@code index(ComponentChronicleBI chronicle)}
* with each component of the iteration. May be used for initial index
* creation, or if indexing properties have changed.
*/
void clearIndex();
/**
* Zero out the statistics that would be reported by {@link #reportIndexedItems()}.
*/
void clearIndexedStatistics();
/**
* Close the index writer as part of normal shutdown.
*/
void closeWriter();
/**
* Checkpoints the index writer.
*/
void commitWriter();
/**
* To maximize search performance, you can optionally call forceMerge.
* forceMerge is a costly operation, so generally call it when the
* index is relatively static (after finishing a bulk addition of documents)
*/
void forceMerge();
/**
* Index the chronicle in a manner appropriate to the
* indexer implementation. The implementation is responsible to
* determine if the component is appropriate for indexing. All changed
* components will be sent to all indexers for indexing. The implementation
* must not perform lengthy operations on this thread.
*
* @param chronicle the chronicle
* @return a {@code Future<Long>}for the index generation to which this
* chronicle is attached. If
* this chronicle is not indexed by this indexer, the Future returns
* {@code Long.MIN_VALUE{@code . The generation can be used with searchers
* to make sure that the component's indexing is complete prior to performing
* a search where the chronicle's results must be included.
*/
Future<Long> index(ObjectChronology<?> chronicle);
/**
* Locate the concept most closely tied to a search result, and merge them together, maintaining the best score.
* This is a convenience method.
*
* @param searchResult the search result
* @return the merged results, in a collection that iterates in the same order as they were passed in.
*/
Collection<ConceptSearchResult> mergeResultsOnConcept(List<SearchResult> searchResult);
/**
* Query index with no specified target generation of the index.
*
* @param query The query to apply.
* @param sizeLimit The maximum size of the result list.
* @return a List of {@code SearchResult} that contains the nid of the
* component that matched, and the score of that match relative to other matches.
*/
List<SearchResult> query(String query, int sizeLimit);
/**
* Query index with the specified target generation of the index.
*
* @param query The query to apply
* @param sememeConceptSequence the sememe concept sequence
* @param sizeLimit The maximum size of the result list. Pass Integer.MAX_VALUE for unlimited results.
* @param targetGeneration (optional) target generation that must be included in the search
* or Long.MIN_VALUE if there is no need to wait for a target generation. Long.MAX_VALUE can be passed in to force this query to wait until
* any in progress indexing operations are completed - and then use the latest index. Null behaves the same as Long.MIN_VALUE
* @return a List of {@code SearchResult} that contains the nid of the
* component that matched, and the score of that match relative to other matches.
*/
List<SearchResult> query(String query, Integer[] sememeConceptSequence, int sizeLimit, Long targetGeneration);
/**
* Query.
*
* @param query The query to apply.
* @param prefixSearch if true, utilize a search algorithm that is optimized for prefix searching, such as the searching
* that would be done to implement a type-ahead style search. Does not use the Lucene Query parser. Every term (or token)
* that is part of the query string will be required to be found in the result.
*
* Note, it is useful to NOT trim the text of the query before it is sent in - if the last word of the query has a
* space character following it, that word will be required as a complete term. If the last word of the query does not
* have a space character following it, that word will be required as a prefix match only.
*
* For example:
* The query "family test" will return results that contain 'Family Testudinidae'
* The query "family test " will not match on 'Testudinidae', so that will be excluded.
* @param sememeConceptSequence the sememe concept sequence
* @param sizeLimit The maximum size of the result list. Pass Integer.MAX_VALUE for unlimited results.
* @param targetGeneration target generation that must be included in the search or Long.MIN_VALUE if there is no need
* to wait for a target generation. Long.MAX_VALUE can be passed in to force this query to wait until any in progress
* indexing operations are completed - and then use the latest index.
* @return a List of {@link SearchResult} that contains the nid of the component that matched, and the score of that match relative
* to other matches.
*/
List<SearchResult> query(String query,
boolean prefixSearch,
Integer[] sememeConceptSequence,
int sizeLimit,
Long targetGeneration);
/**
* Report indexed items.
*
* @return name / value pairs that give statistics on the number of things indexed since the last time
* #clearIndexedStatistics was called.
*/
HashMap<String, Integer> reportIndexedItems();
//~--- get methods ---------------------------------------------------------
/**
* Checks if enabled.
*
* @return true if this indexer is enabled.
*/
boolean isEnabled();
//~--- set methods ---------------------------------------------------------
/**
* Enables or disables an indexer. A disabled indexer will take
* no action when the index method is called.
* @param enabled true if the indexer is enabled, otherwise false.
*/
void setEnabled(boolean enabled);
//~--- get methods ---------------------------------------------------------
/**
* Gets the indexed generation callable.
*
* @param nid for the component that the caller wished to wait until it's
* document is added to the index.
* @return a {@code Callable<Long>} object that will block until this
* indexer has added the document to the index. The {@code call()} method
* on the object will return the index generation that contains the document,
* which can be used in search calls to make sure the generation is available
* to the searcher.
*/
IndexedGenerationCallable getIndexedGenerationCallable(int nid);
/**
* Gets the indexer folder.
*
* @return File representing the folder where the indexer stores its files.
*/
File getIndexerFolder();
/**
* Gets the indexer name.
*
* @return the name of this indexer.
*/
String getIndexerName();
}