package org.molgenis.data.elasticsearch.util; import com.codepoetics.protonpack.StreamUtils; import com.google.common.util.concurrent.AtomicLongMap; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.admin.indices.exists.types.TypesExistsResponse; import org.elasticsearch.action.admin.indices.mapping.delete.DeleteMappingResponse; import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse; import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse; import org.elasticsearch.action.bulk.BulkProcessor; import org.elasticsearch.action.deletebyquery.DeleteByQueryResponse; import org.elasticsearch.action.deletebyquery.IndexDeleteByQueryResponse; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.client.Client; import org.elasticsearch.cluster.metadata.MappingMetaData; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.molgenis.data.Entity; import org.molgenis.data.MolgenisDataException; import org.molgenis.data.Query; import org.molgenis.data.elasticsearch.request.SearchRequestGenerator; import org.molgenis.data.meta.model.EntityType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Arrays; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.stream.Stream; import static org.elasticsearch.client.Requests.refreshRequest; import static org.molgenis.data.elasticsearch.util.MapperTypeSanitizer.sanitizeMapperType; /** * Facade in front of the ElasticSearch client. */ public class ElasticsearchUtils { private static final Logger LOG = LoggerFactory.getLogger(ElasticsearchUtils.class); private static final TimeValue SCROLL_KEEP_ALIVE = new TimeValue(5, TimeUnit.MINUTES); private static final int SCROLL_SIZE = 1000; private final Client client; private final SearchRequestGenerator generator = new SearchRequestGenerator(); private final BulkProcessorFactory bulkProcessorFactory; public ElasticsearchUtils(Client client) { this(client, new BulkProcessorFactory()); } public ElasticsearchUtils(Client client, BulkProcessorFactory bulkProcessorFactory) { this.client = client; this.bulkProcessorFactory = bulkProcessorFactory; } public boolean indexExists(String index) { return client.admin().indices().prepareExists(index).execute().actionGet().isExists(); } // Wait until elasticsearch is ready public void waitForYellowStatus() { client.admin().cluster().prepareHealth().setWaitForYellowStatus().execute().actionGet(); } private void refreshIndex(String index) { client.admin().indices().refresh(refreshRequest(index)).actionGet(); } void waitForCompletion(BulkProcessor bulkProcessor) { LOG.trace("waitForCompletion..."); try { boolean isCompleted = bulkProcessor.awaitClose(Long.MAX_VALUE, TimeUnit.NANOSECONDS); if (!isCompleted) { throw new MolgenisDataException("Failed to complete bulk request within the given time"); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } finally { LOG.debug("bulkProcessor closed."); } } public ImmutableOpenMap<String, MappingMetaData> getMappings(String indexName) { LOG.trace("Retrieving Elasticsearch mappings ..."); GetMappingsResponse mappingsResponse = client.admin().indices().prepareGetMappings(indexName).get(); LOG.debug("Retrieved Elasticsearch mappings"); return mappingsResponse.getMappings().get(indexName); } public void putMapping(String index, XContentBuilder jsonBuilder, String entityName) throws IOException { if (LOG.isTraceEnabled()) LOG.trace("Creating Elasticsearch mapping [{}] ...", jsonBuilder.string()); PutMappingResponse response = client.admin().indices().preparePutMapping(index) .setType(sanitizeMapperType(entityName)).setSource(jsonBuilder).get(); if (!response.isAcknowledged()) { throw new ElasticsearchException( "Creation of mapping for documentType [" + entityName + "] failed. Response=" + response); } if (LOG.isDebugEnabled()) LOG.debug("Created Elasticsearch mapping [{}]", jsonBuilder.string()); } public void refresh(String index) { LOG.trace("Refreshing Elasticsearch index [{}] ...", index); refreshIndex(index); LOG.debug("Refreshed Elasticsearch index [{}]", index); } public long getCount(Query<Entity> q, EntityType entityType, String type, String indexName) { if (q != null) { LOG.trace("Counting Elasticsearch [{}] docs using query [{}] ...", type, q); } else { LOG.trace("Counting Elasticsearch [{}] docs", type); } SearchRequestBuilder searchRequestBuilder = client.prepareSearch(indexName); generator.buildSearchRequest(searchRequestBuilder, type, SearchType.COUNT, q, null, null, null, entityType); SearchResponse searchResponse = searchRequestBuilder.get(); if (searchResponse.getFailedShards() > 0) { throw new ElasticsearchException("Search failed. Returned headers:" + searchResponse.getHeaders()); } long count = searchResponse.getHits().totalHits(); long ms = searchResponse.getTookInMillis(); if (q != null) { LOG.debug("Counted {} Elasticsearch [{}] docs using query [{}] in {}ms", count, type, q, ms); } else { LOG.debug("Counted {} Elasticsearch [{}] docs in {}ms", count, type, ms); } return count; } public void optimizeIndex(String indexName) { LOG.trace("Optimizing Elasticsearch index [{}] ...", indexName); // setMaxNumSegments(1) fully optimizes the index OptimizeResponse response = client.admin().indices().prepareOptimize(indexName).setMaxNumSegments(1).get(); if (response.getFailedShards() > 0) { throw new ElasticsearchException("Optimize failed. Returned headers:" + response.getHeaders()); } LOG.debug("Optimized Elasticsearch index [{}]", indexName); } /** * Deletes a document from an index. * * @param index the name of the index * @param id the ID of the document * @param type tye type of the document */ public void deleteById(String index, String id, String type) { LOG.trace("Deleting Elasticsearch '{}' doc with id [{}] ...", type, id); GetResponse response = client.prepareGet(index, type, id).get(); LOG.debug("Retrieved document type [{}] with id [{}] in index [{}]", type, id, index); if (response.isExists()) { client.prepareDelete(index, type, id).get(); } LOG.debug("Deleted Elasticsearch '{}' doc with id [{}]", type, id); } /** * Checks if a type exists in an index. * * @param type the name of the type * @param indexName the name of the index * @return boolean indicating if the type exists in the index */ public boolean isTypeExists(String type, String indexName) { LOG.trace("Check whether type [{}] exists in index [{}]...", type, indexName); TypesExistsResponse typesExistsResponse = client.admin().indices().prepareTypesExists(indexName).setTypes(type) .get(); boolean typeExists = typesExistsResponse.isExists(); LOG.trace("Checked whether type [{}] exists in index [{}]", type, indexName); return typeExists; } /** * Tries to delete the mapping for a type in an index. * * @param type name of the type * @param indexName name of the index * @return boolean indicating success of the deletion */ public boolean deleteMapping(String type, String indexName) { DeleteMappingResponse deleteMappingResponse = client.admin().indices().prepareDeleteMapping(indexName) .setType(type).get(); return deleteMappingResponse.isAcknowledged(); } /** * Deletes all documents of a type in an index. * * @param type tye name of the type of the documents * @param indexName the name of the index * @return boolean indicating success of the deletion */ public boolean deleteAllDocumentsOfType(String type, String indexName) { LOG.trace("Deleting all Elasticsearch '{}' docs ...", type); DeleteByQueryResponse deleteByQueryResponse = client.prepareDeleteByQuery(indexName) .setQuery(new TermQueryBuilder("_type", type)).get(); if (deleteByQueryResponse != null) { IndexDeleteByQueryResponse idbqr = deleteByQueryResponse.getIndex(indexName); if (idbqr != null && idbqr.getFailedShards() > 0) { return false; } } LOG.debug("Deleted all Elasticsearch '{}' docs.", type); return true; } public void flushIndex(String indexName) { LOG.trace("Flushing Elasticsearch index [{}] ...", indexName); client.admin().indices().prepareFlush(indexName).get(); LOG.debug("Flushed Elasticsearch index [{}]", indexName); } public SearchResponse search(SearchType searchType, SearchRequest request, EntityType entityType, String documentType, String indexName) { SearchRequestBuilder builder = client.prepareSearch(indexName); generator .buildSearchRequest(builder, documentType, searchType, request.getQuery(), request.getAggregateField1(), request.getAggregateField2(), request.getAggregateFieldDistinct(), entityType); LOG.trace("*** REQUEST\n{}", builder); SearchResponse response = builder.get(); LOG.trace("*** RESPONSE\n{}", response); return response; } /** * Performs a search query and returns the result as a {@link Stream} of ID strings. */ public Stream<String> searchForIds(Consumer<SearchRequestBuilder> queryBuilder, String queryToString, String type, String indexName) { SearchHits searchHits = search(queryBuilder, queryToString, type, indexName); return Arrays.stream(searchHits.hits()).map(SearchHit::getId); } /** * Performs a search query and returns the result as a {@link Stream} of ID strings. */ public Stream<String> searchForIdsWithScanScroll(Consumer<SearchRequestBuilder> queryBuilder, String queryToString, String type, String indexName) { LOG.trace("Searching Elasticsearch '{}' docs using query [{}] ...", type, queryToString); SearchRequestBuilder searchRequestBuilder = client.prepareSearch(indexName); queryBuilder.accept(searchRequestBuilder); searchRequestBuilder.setScroll(SCROLL_KEEP_ALIVE).setSize(SCROLL_SIZE); LOG.trace("SearchRequest: {}", searchRequestBuilder); SearchResponse originalSearchResponse = searchRequestBuilder.execute().actionGet(); LOG.debug("Searched Elasticsearch '{}' docs using query [{}] in {}ms", type, queryToString, originalSearchResponse.getTookInMillis()); Stream<SearchResponse> infiniteResponses = Stream.iterate(originalSearchResponse, searchResponse -> client.prepareSearchScroll(searchResponse.getScrollId()).setScroll(SCROLL_KEEP_ALIVE) .execute().actionGet()); return StreamUtils.takeWhile(infiniteResponses, searchResponse -> searchResponse.getHits().getHits().length > 0) .flatMap(searchResponse -> Arrays.stream(searchResponse.getHits().getHits())).map(SearchHit::getId); } private SearchHits search(Consumer<SearchRequestBuilder> queryBuilder, String queryToString, String type, String indexName) { LOG.trace("Searching Elasticsearch '{}' docs using query [{}] ...", type, queryToString); SearchRequestBuilder searchRequestBuilder = client.prepareSearch(indexName); queryBuilder.accept(searchRequestBuilder); LOG.trace("SearchRequest: {}", searchRequestBuilder); SearchResponse searchResponse = searchRequestBuilder.execute().actionGet(); if (searchResponse.getFailedShards() > 0) { StringBuilder sb = new StringBuilder("Search failed."); for (ShardSearchFailure failure : searchResponse.getShardFailures()) { sb.append("\n").append(failure.reason()); } throw new ElasticsearchException(sb.toString()); } LOG.debug("Searched Elasticsearch '{}' docs using query [{}] in {}ms", type, queryToString, searchResponse.getTookInMillis()); return searchResponse.getHits(); } /** * Creates a {@link BulkProcessor} and adds a stream of {@link IndexRequest}s to it. * Counts how many requests of each type were added to the {@link BulkProcessor}. * * @param requests the {@link IndexRequest}s to add * @param awaitCompletion indication if the completion of the requests should be awaited synchronously * @return AtomicLongMap containing per type how many requests of that type were added. */ public AtomicLongMap<String> index(Stream<IndexRequest> requests, boolean awaitCompletion) { AtomicLongMap<String> nrIndexedEntitiesPerType = AtomicLongMap.create(); BulkProcessor bulkProcessor = bulkProcessorFactory.create(client); try { requests.forEachOrdered(request -> { LOG.trace("Indexing [{}] with id [{}] in index [{}]...", request.type(), request.id(), request.index()); nrIndexedEntitiesPerType.incrementAndGet(request.type()); bulkProcessor.add(request); }); return nrIndexedEntitiesPerType; } finally { if (awaitCompletion) { waitForCompletion(bulkProcessor); } } } }