package eu.europeana.cloud.service.dps.index; import eu.europeana.cloud.service.dps.index.exception.IndexerException; import eu.europeana.cloud.service.dps.index.structure.IndexedDocument; import eu.europeana.cloud.service.dps.index.structure.IndexerInformations; import eu.europeana.cloud.service.dps.index.structure.SearchResult; import java.util.Map; /** * Interface for indexers. * @author Pavel Kefurt <Pavel.Kefurt@gmail.com> */ public interface Indexer { //default values public static final int PAGE_SIZE = 10; public static final int TIMEOUT = 0; public static final int MAX_QUERY_TERMS = -1; public static final int MIN_TERM_FREQ = -1; public static final int MIN_DOC_FREQ = -1; public static final int MAX_DOC_FREQ = -1; public static final int MIN_WORD_LENGTH = -1; public static final int MAX_WORD_LENGTH = -1; public static final Boolean INCLUDE_ITSELF = false; public enum Operator { AND, OR } /** * Retrieve indexer. * @return instance of client that is used for indexing */ public Object getIndexer(); /** * Retrieve name of indexer. * @return value from SupportedIndexers */ public SupportedIndexers getIndexerName(); /** * Retrieve informations about indexer. * @return instance of IndexerInformations */ public IndexerInformations getIndexerInformations(); /** * Retrieve documents with similar content. * @param documentId index of reference document * @return instance of SearchResult * @throws IndexerException */ public SearchResult getMoreLikeThis(String documentId) throws IndexerException; /** * Retrieve documents with similar content. * @param documentId index of reference document * @param size number of results on one page * @param timeout tells how long it should keep the search context alive. (ms) * @return instance of SearchResult * @throws IndexerException */ public SearchResult getMoreLikeThis(String documentId, int size, int timeout) throws IndexerException; /** * Retrieve documents with similar content. * @param documentId index of reference document * @param fields array of fields to search * @return instance of SearchResult * @throws IndexerException */ public SearchResult getMoreLikeThis(String documentId, String[] fields) throws IndexerException; /** * Retrieve documents with similar content. * @param documentId index of reference document * @param fields array of fields to search * @param size number of results on one page * @param timeout tells how long it should keep the search context alive. (ms) * @return instance of SearchResult * @throws IndexerException */ public SearchResult getMoreLikeThis(String documentId, String[] fields, int size, int timeout) throws IndexerException; /** * Retrieve documents with similar content. * @param documentId index of reference document * @param fields array of fields to search * @param maxQueryTerms maximum number of query terms that will be selected * @param minTermFreq minimum term frequency below which the terms will be ignored from the input document * @param minDocFreq minimum document frequency below which the terms will be ignored from the input document * @param maxDocFreq maximum document frequency above which the terms will be ignored from the input document * @param minWordLength minimum word length frequency below which the terms will be ignored * @param maxWordLength maximum word length frequency above which the terms will be ignored * @param size number of results on one page * @param timeout tells how long it should keep the search context alive. (ms) * @param includeItself specifies whether the input document should also be included in the search result * @return instance of SearchResult * @throws IndexerException */ public SearchResult getMoreLikeThis(String documentId, String[] fields, int maxQueryTerms, int minTermFreq, int minDocFreq, int maxDocFreq, int minWordLength, int maxWordLength, int size, int timeout, Boolean includeItself) throws IndexerException; /** * Retrieve documents which contains search text. * @param text search text * @param fields array of fields to search * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult search(String text, String[] fields) throws IndexerException; /** * Retrieve documents which contains search text. * @param text search text * @param fields array of fields to search * @param size number of results on one page * @param timeout tells how long it should keep the search context alive. (ms) * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult search(String text, String[] fields, int size, int timeout) throws IndexerException; /** * Retrieve documents which contains search text in full text field. * @param text search text * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult searchFullText(String text) throws IndexerException; /** * Retrieve documents which contains search text in full text field. * @param text search text * @param size number of results on one page * @param timeout tells how long it should keep the search context alive. (ms) * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult searchFullText(String text, int size, int timeout) throws IndexerException; /** * Retrieve documents which contains phrase text in full text field. * @param text search text * @param slop number of term position moves (edits) allowed * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult searchPhraseInFullText(String text, int slop) throws IndexerException; /** * Retrieve documents which contains phrase text. * @param text search text * @param field field to search * @param slop number of term position moves (edits) allowed * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult searchPhrase(String text, String field, int slop) throws IndexerException; /** * Retrieve documents which contains phrase text. * @param text search text * @param field field to search * @param slop number of term position moves (edits) allowed * @param size number of results on one page * @param timeout tells how long it should keep the search context alive. (ms) * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult searchPhrase(String text, String field, int slop, int size, int timeout) throws IndexerException; /** * Search documents by lucene query syntax. * @param query lucene query * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult advancedSearch(String query) throws IndexerException; /** * Search documents by lucene query syntax. * @param query lucene query * @param size number of results on one page * @param timeout tells how long it should keep the search context alive. (ms) * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult advancedSearch(String query, int size, int timeout) throws IndexerException; /** * Search documents by lucene query syntax. * @param query lucene query * @param parameters optional settings * <ul> * <li>default_field [String]</li> * <li>default_operator [Operator]</li> * <li>analyzer [String]</li> * <li>allow_loading_wildcard [Boolean]</li> * <li>lowercase_expanded_terms [Boolean]</li> * <li>enable_position_increments [Boolean]</li> * <li>fuzzy_prefix_length [Integer]</li> * <li>fuzzy_max_expansions [Integer]</li> * <li>phrase_slop [Integer]</li> * <li>boost [Float]</li> * <li>analyze_wildcard [Boolean]</li> * <li>auto_generate_phrase_queries [Boolean]</li> * <li>max_determinized_states [Integer]</li> * <li>lenient [Boolean]</li> * <li>timeZone [String]</li> * </ul> * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult advancedSearch(String query, Map<String, Object> parameters) throws IndexerException; /** * Search documents by lucene query syntax. * @param query lucene query * @param parameters optional settings * <ul> * <li>default_field [String]</li> * <li>default_operator [Operator]</li> * <li>analyzer [String]</li> * <li>allow_loading_wildcard [Boolean]</li> * <li>lowercase_expanded_terms [Boolean]</li> * <li>enable_position_increments [Boolean]</li> * <li>fuzzy_prefix_length [Integer]</li> * <li>fuzzy_max_expansions [Integer]</li> * <li>phrase_slop [Integer]</li> * <li>boost [Float]</li> * <li>analyze_wildcard [Boolean]</li> * <li>auto_generate_phrase_queries [Boolean]</li> * <li>max_determinized_states [Integer]</li> * <li>lenient [Boolean]</li> * <li>timeZone [String]</li> * </ul> * @param size number of results on one page * @param timeout tells how long it should keep the search context alive. (ms) * @return instance of SearchResult * @throws eu.europeana.cloud.service.dps.index.exception.IndexerException */ public SearchResult advancedSearch(String query, Map<String, Object> parameters, int size, int timeout) throws IndexerException; /** * Insert data with generated documentId. * @param data data as a JSON string (e.g. {"field1":"value1","field2":"value2"}) * @throws IndexerException */ public void insert(String data) throws IndexerException; /** * Insert data with generated documentId. * @param data data as a Map (key = field, value = data) * @throws IndexerException */ public void insert(Map<String, Object> data) throws IndexerException; /** * Insert data. * @param documentId unique identifier * @param data data as a JSON string (e.g. {"field1":"value1","field2":"value2"}) * @throws IndexerException */ public void insert(String documentId, String data) throws IndexerException; /** * Insert data. * @param documentId unique identifier * @param data data as a Map (key = field, value = data) * @throws IndexerException */ public void insert(String documentId, Map<String, Object> data) throws IndexerException; /** * Update data in record. * @param documentId unique identifier of updated record. * @param data data as a JSON string (e.g. {"field1":"value1","field2":"value2"}) * @throws IndexerException */ public void update(String documentId, String data) throws IndexerException; /** * Update data in record. * @param documentId unique identifier of updated record. * @param data data as a Map (key = field, value = data) * @throws IndexerException */ public void update(String documentId, Map<String, Object> data) throws IndexerException; /** * Delete record with documentId. * @param documentId unique identifier of deleted record. * @throws IndexerException */ public void delete(String documentId) throws IndexerException; /** * Retrieve document. * @param documentId unique identifier of document * @return instance of IndexedDocument or null * @throws IndexerException */ public IndexedDocument getDocument(String documentId) throws IndexerException; /** * Retrieve next page of results. * @param scrollId scroll id for next page * @param context last Search Result object (recommended) or other informations about search * @return instance of SearchResult or null if no other data * @throws IndexerException */ public SearchResult getNextPage(String scrollId, Object context) throws IndexerException; }