package org.molgenis.data.elasticsearch; import com.google.common.util.concurrent.AtomicLongMap; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.client.Client; import org.elasticsearch.common.collect.FluentIterable; import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.molgenis.data.*; import org.molgenis.data.aggregation.AggregateQuery; import org.molgenis.data.aggregation.AggregateResult; import org.molgenis.data.elasticsearch.index.ElasticsearchIndexCreator; import org.molgenis.data.elasticsearch.index.MappingsBuilder; import org.molgenis.data.elasticsearch.request.SearchRequestGenerator; import org.molgenis.data.elasticsearch.response.ResponseParser; import org.molgenis.data.elasticsearch.util.ElasticsearchUtils; import org.molgenis.data.elasticsearch.util.SearchRequest; import org.molgenis.data.elasticsearch.util.SearchResult; import org.molgenis.data.meta.model.Attribute; import org.molgenis.data.meta.model.EntityType; import org.molgenis.data.support.QueryImpl; import org.molgenis.util.EntityUtils; import org.molgenis.util.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.UncheckedIOException; import java.util.List; import java.util.function.Consumer; import java.util.stream.Stream; import static java.util.Objects.requireNonNull; import static java.util.stream.Stream.concat; import static java.util.stream.StreamSupport.stream; import static org.molgenis.data.DataConverter.convert; import static org.molgenis.data.elasticsearch.util.ElasticsearchEntityUtils.toElasticsearchId; import static org.molgenis.data.elasticsearch.util.ElasticsearchEntityUtils.toElasticsearchIds; import static org.molgenis.data.elasticsearch.util.MapperTypeSanitizer.sanitizeMapperType; import static org.molgenis.data.support.EntityTypeUtils.createFetchForReindexing; /** * ElasticSearch implementation of the SearchService interface. * * @author erwin */ public class ElasticsearchService implements SearchService { private static final Logger LOG = LoggerFactory.getLogger(ElasticsearchService.class); private static final int BATCH_SIZE = 1000; public enum IndexingMode { ADD, UPDATE } private final DataService dataService; private final ElasticsearchEntityFactory elasticsearchEntityFactory; private final String indexName; private final ResponseParser responseParser = new ResponseParser(); private final ElasticsearchUtils elasticsearchFacade; private final SearchRequestGenerator searchRequestGenerator = new SearchRequestGenerator(); public ElasticsearchService(Client client, String indexName, DataService dataService, ElasticsearchEntityFactory elasticsearchEntityFactory) { this.indexName = requireNonNull(indexName); this.dataService = requireNonNull(dataService); this.elasticsearchEntityFactory = requireNonNull(elasticsearchEntityFactory); this.elasticsearchFacade = new ElasticsearchUtils(client); new ElasticsearchIndexCreator(client).createIndexIfNotExists(indexName); } @Override public Iterable<String> getTypes() { return () -> elasticsearchFacade.getMappings(indexName).keysIt(); } private SearchResult search(SearchRequest request) { // TODO : A quick fix now! Need to find a better way to get // EntityType in ElasticSearchService, because ElasticSearchService should not be // aware of DataService. E.g. Put EntityType in the SearchRequest object EntityType entityType = (request.getDocumentType() != null && dataService != null && dataService .hasRepository(request.getDocumentType())) ? dataService .getEntityType(request.getDocumentType()) : null; String documentType = request.getDocumentType() == null ? null : sanitizeMapperType(request.getDocumentType()); SearchResponse response = elasticsearchFacade .search(SearchType.QUERY_AND_FETCH, request, entityType, documentType, indexName); return responseParser.parseSearchResponse(request, response, entityType, dataService); } @Override public boolean hasMapping(EntityType entityType) { return hasMapping(entityType.getName()); } @Override public boolean hasMapping(String entityName) { return elasticsearchFacade.getMappings(indexName).containsKey(sanitizeMapperType(entityName)); } @Override public void createMappings(EntityType entityType) { createMappings(entityType, true, true); } private void createMappings(String index, EntityType entityType, boolean enableNorms, boolean createAllIndex) { try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) { MappingsBuilder.buildMapping(jsonBuilder, entityType, enableNorms, createAllIndex); elasticsearchFacade.putMapping(index, jsonBuilder, entityType.getName()); } catch (IOException e) { throw new UncheckedIOException(e); } } @Override public void createMappings(EntityType entityType, boolean enableNorms, boolean createAllIndex) { createMappings(indexName, entityType, enableNorms, createAllIndex); } @Override public void refresh() { refreshIndex(); } @Override public void refreshIndex() { elasticsearchFacade.refresh(indexName); } @Override public long count(EntityType entityType) { return count(null, entityType); } @Override public long count(Query<Entity> q, EntityType entityType) { String entityName = entityType.getName(); String type = sanitizeMapperType(entityName); return elasticsearchFacade.getCount(q, entityType, type, indexName); } @Override public void index(Entity entity, EntityType entityType, IndexingMode indexingMode) { LOG.debug("Indexing single {}.{} entity ...", entityType.getName(), entity.getIdValue()); index(Stream.of(entity), entityType, indexingMode == IndexingMode.UPDATE); } @Override public long index(Iterable<? extends Entity> entities, EntityType entityType, IndexingMode indexingMode) { LOG.debug("Indexing multiple {} entities...", entityType.getName()); return index(stream(entities.spliterator(), false), entityType, indexingMode == IndexingMode.UPDATE); } @Override public long index(Stream<? extends Entity> entities, EntityType entityType, IndexingMode indexingMode) { LOG.debug("Indexing multiple {} entities...", entityType.getName()); return index(entities, entityType, indexingMode == IndexingMode.UPDATE); } private long index(Stream<? extends Entity> entityStream, EntityType entityType, boolean addReferences) { String entityName = entityType.getName(); String type = sanitizeMapperType(entityName); Stream<IndexRequest> indexRequestStream = entityStream .flatMap(entity -> createIndexRequestStreamForEntity(entity, entityType, type, addReferences)); AtomicLongMap<String> counts = elasticsearchFacade.index(indexRequestStream, true); return counts.get(type); } /** * Creates an {@link IndexRequest} to reindex an entity. Optionally also creates {@link IndexRequest}s for referencing * entities. * * @param entity the entity that should be indexed * @param entityType the {@link EntityType} of the entity * @param type the sanitized mapping type of the entity * @param addRequestsForReferencingEntities boolean indicating if {@link IndexRequest}s should be added for all * referencing entities. * @return Stream of {@link IndexRequest}s for the entity */ private Stream<IndexRequest> createIndexRequestStreamForEntity(Entity entity, EntityType entityType, String type, boolean addRequestsForReferencingEntities) { Stream<IndexRequest> result = Stream.of(createIndexRequestForEntity(entity, entityType, type)); if (addRequestsForReferencingEntities) { result = concat(result, createIndexRequestsForReferencingEntities(entity, entityType)); } return result; } /** * Creates {@link IndexRequest}s for {@link Entity}s that have a reference to a particular entity instance * * @param entity the entity that is referenced by the entities that need to be updated * @param entityType {@link EntityType} of the referenced entity * @return Stream of {@link IndexRequest}s for the entities that reference entity. */ private Stream<IndexRequest> createIndexRequestsForReferencingEntities(Entity entity, EntityType entityType) { Stream<IndexRequest> references = Stream.of(); // Find entity metadata that is currently, in the database, referring to the entity we're reindexing for (Pair<EntityType, List<Attribute>> pair : EntityUtils.getReferencingEntityType(entityType, dataService)) { EntityType refEntityType = pair.getA(); // Search the index for referring documents of this type Stream<Entity> referringEntitiesStream = findReferringDocuments(entity, refEntityType, pair.getB()); // Get actual entities from the dataservice, skipping the ones that no longer exist and // fetching all of their attributes in one go referringEntitiesStream = dataService .findAll(refEntityType.getName(), referringEntitiesStream.map(Entity::getIdValue), createFetchForReindexing(refEntityType)); references = concat(references, referringEntitiesStream .map(referencingEntity -> createIndexRequestForEntity(referencingEntity, refEntityType, sanitizeMapperType(refEntityType.getName())))); } return references; } /** * Searches the index for documents of a certain type that contain a reference to a specific entity. * Uses {@link #searchInternalWithScanScroll(Query, EntityType)} to scroll through the existing referring * entities in a context that remains valid even when the documents are getting updated. * * @param referredEntity the entity that should be referred to in the documents * @param referringEntityType {@link EntityType} of the referring documents * @param referringAttributes {@link List} of {@link Attribute} of attributes that may reference the #referredEntity * @return Stream of {@link Entity} references representing the documents. */ private Stream<Entity> findReferringDocuments(Entity referredEntity, EntityType referringEntityType, List<Attribute> referringAttributes) { // Find out which documents of this type currently, in ElasticSearch, contain a reference to // the entity we're reindexing QueryImpl<Entity> q = null; for (Attribute attribute : referringAttributes) { if (q == null) { q = new QueryImpl<>(); } else { q.or(); } q.eq(attribute.getName(), referredEntity); } LOG.debug("q: [{}], referringEntityType: [{}]", q.toString(), referringEntityType.getName()); if (hasMapping(referringEntityType)) { return searchInternalWithScanScroll(q, referringEntityType); } else { return Stream.empty(); } } /** * Creates an IndexRequest for an entity in index {@link #indexName}. * * @param entity the entity that will be indexed * @param entityType {@link EntityType} of the entity * @param type sanitized mapper type of the entity, so it need not be recomputed */ private IndexRequest createIndexRequestForEntity(Entity entity, EntityType entityType, String type) { String id = toElasticsearchId(entity, entityType); XContentBuilder xContentBuilder = elasticsearchEntityFactory.create(entity); LOG.trace("Indexing [{}] with id [{}] in index [{}]...", type, id, indexName); return new IndexRequest().index(indexName).type(type).id(id).source(xContentBuilder); } @Override public void delete(Entity entity, EntityType entityType) { String elasticsearchId = toElasticsearchId(entity, entityType); deleteById(elasticsearchId, entityType); } @Override public void deleteById(String id, EntityType entityType) { deleteById(indexName, id, entityType.getName()); } private void deleteById(String index, String id, String entityFullName) { String type = sanitizeMapperType(entityFullName); elasticsearchFacade.deleteById(index, id, type); } @Override public void deleteById(Stream<String> ids, EntityType entityType) { ids.forEach(id -> deleteById(id, entityType)); } @Override public void delete(Iterable<? extends Entity> entities, EntityType entityType) { delete(stream(entities.spliterator(), true), entityType); } @Override public void delete(Stream<? extends Entity> entities, EntityType entityType) { Stream<Object> entityIds = entities.map(Entity::getIdValue); Iterators.partition(entityIds.iterator(), BATCH_SIZE).forEachRemaining( batchEntityIds -> deleteById(toElasticsearchIds(batchEntityIds.stream()), entityType)); } @Override public void delete(String entityName) { String type = sanitizeMapperType(entityName); if (elasticsearchFacade.isTypeExists(type, indexName) && !elasticsearchFacade.deleteMapping(type, indexName)) { throw new ElasticsearchException("Delete of mapping for type '" + type + "' failed."); } if (!elasticsearchFacade.deleteAllDocumentsOfType(type, indexName)) { throw new ElasticsearchException("Deleting all documents of type '" + type + "' failed."); } } @Override public Iterable<Entity> search(Query<Entity> q, final EntityType entityType) { return searchInternal(q, entityType); } @Override public Stream<Entity> searchAsStream(Query<Entity> q, EntityType entityType) { ElasticsearchEntityIterable searchInternal = searchInternal(q, entityType); return new EntityStream(searchInternal.stream(), true); } private ElasticsearchEntityIterable searchInternal(Query<Entity> q, EntityType entityType) { return new ElasticsearchEntityIterable(q, entityType, elasticsearchFacade, elasticsearchEntityFactory, searchRequestGenerator, indexName); } private Stream<Entity> searchInternalWithScanScroll(Query<Entity> query, EntityType entityType) { String type = sanitizeMapperType(entityType.getName()); Consumer<SearchRequestBuilder> searchRequestBuilderConsumer = searchRequestBuilder -> searchRequestGenerator .buildSearchRequest(searchRequestBuilder, type, SearchType.QUERY_AND_FETCH, query, null, null, null, entityType); return elasticsearchFacade .searchForIdsWithScanScroll(searchRequestBuilderConsumer, query.toString(), type, indexName) .map(idString -> convert(idString, entityType.getIdAttribute())) .map(idObject -> elasticsearchEntityFactory.getReference(entityType, idObject)); } @Override public AggregateResult aggregate(AggregateQuery aggregateQuery, final EntityType entityType) { Query<Entity> q = aggregateQuery.getQuery(); Attribute xAttr = aggregateQuery.getAttributeX(); Attribute yAttr = aggregateQuery.getAttributeY(); Attribute distinctAttr = aggregateQuery.getAttributeDistinct(); SearchRequest searchRequest = new SearchRequest(entityType.getName(), q, xAttr, yAttr, distinctAttr); SearchResult searchResults = search(searchRequest); return searchResults.getAggregate(); } @Override public void flush() { elasticsearchFacade.flushIndex(indexName); } @Override public void rebuildIndex(Repository<? extends Entity> repository) { EntityType entityType = repository.getEntityType(); if (hasMapping(entityType)) { LOG.debug("Delete index for repository {}...", repository.getName()); delete(entityType.getName()); } createMappings(entityType); LOG.trace("Indexing {} repository in batches of size {}...", repository.getName(), BATCH_SIZE); repository.forEachBatched(createFetchForReindexing(entityType), entities -> index(entities, entityType, IndexingMode.ADD), BATCH_SIZE); LOG.debug("Create index for repository {}...", repository.getName()); } @Override public void optimizeIndex() { elasticsearchFacade.optimizeIndex(indexName); } @Override public Entity findOne(Query<Entity> q, EntityType entityType) { return FluentIterable.from(search(q, entityType)).first().orNull(); } }