/*
* (C) Copyright 2014-2016 Nuxeo SA (http://nuxeo.com/) and others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributors:
* Tiry
* bdelbosc
*/
package org.nuxeo.elasticsearch.core;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.nuxeo.elasticsearch.ElasticSearchConstants.CHILDREN_FIELD;
import static org.nuxeo.elasticsearch.ElasticSearchConstants.DOC_TYPE;
import static org.nuxeo.elasticsearch.ElasticSearchConstants.INDEX_BULK_MAX_SIZE_PROPERTY;
import static org.nuxeo.elasticsearch.ElasticSearchConstants.PATH_FIELD;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequestBuilder;
import org.elasticsearch.action.get.GetRequestBuilder;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.engine.VersionConflictEngineException;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
import org.nuxeo.common.logging.SequenceTracer;
import org.nuxeo.ecm.automation.jaxrs.io.documents.JsonESDocumentWriter;
import org.nuxeo.ecm.core.api.ConcurrentUpdateException;
import org.nuxeo.ecm.core.api.DocumentModel;
import org.nuxeo.ecm.core.api.DocumentNotFoundException;
import org.nuxeo.ecm.core.api.NuxeoException;
import org.nuxeo.elasticsearch.api.ElasticSearchIndexing;
import org.nuxeo.elasticsearch.commands.IndexingCommand;
import org.nuxeo.elasticsearch.commands.IndexingCommand.Type;
import org.nuxeo.runtime.api.Framework;
import org.nuxeo.runtime.metrics.MetricsService;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.SharedMetricRegistries;
import com.codahale.metrics.Timer;
import com.codahale.metrics.Timer.Context;
/**
* @since 6.0
*/
public class ElasticSearchIndexingImpl implements ElasticSearchIndexing {
private static final Log log = LogFactory.getLog(ElasticSearchIndexingImpl.class);
// debug curl line max size
private static final int MAX_CURL_LINE = 8 * 1024;
// send the bulk indexing command when this size is reached, optimal is 5-10m
private static final int DEFAULT_MAX_BULK_SIZE = 5 * 1024 * 1024;
private final ElasticSearchAdminImpl esa;
private final Timer deleteTimer;
private final Timer indexTimer;
private final Timer bulkIndexTimer;
private final boolean useExternalVersion;
private JsonESDocumentWriter jsonESDocumentWriter;
public ElasticSearchIndexingImpl(ElasticSearchAdminImpl esa) {
this.esa = esa;
MetricRegistry registry = SharedMetricRegistries.getOrCreate(MetricsService.class.getName());
indexTimer = registry.timer(MetricRegistry.name("nuxeo", "elasticsearch", "service", "index"));
deleteTimer = registry.timer(MetricRegistry.name("nuxeo", "elasticsearch", "service", "delete"));
bulkIndexTimer = registry.timer(MetricRegistry.name("nuxeo", "elasticsearch", "service", "bulkIndex"));
this.jsonESDocumentWriter = new JsonESDocumentWriter();// default writer
this.useExternalVersion = esa.useExternalVersion();
}
/**
* @since 7.2
*/
public ElasticSearchIndexingImpl(ElasticSearchAdminImpl esa, JsonESDocumentWriter jsonESDocumentWriter) {
this(esa);
this.jsonESDocumentWriter = jsonESDocumentWriter;
}
@Override
public void runIndexingWorker(List<IndexingCommand> cmds) {
throw new UnsupportedOperationException("Not implemented");
}
@Override
public void runReindexingWorker(String repositoryName, String nxql) {
throw new UnsupportedOperationException("Not implemented");
}
@Override
public void indexNonRecursive(List<IndexingCommand> cmds) {
int nbCommands = cmds.size();
if (nbCommands == 1) {
indexNonRecursive(cmds.get(0));
return;
}
// simulate long indexing
// try {Thread.sleep(1000);} catch (InterruptedException e) { }
processBulkDeleteCommands(cmds);
try (Context ignored = bulkIndexTimer.time()) {
processBulkIndexCommands(cmds);
}
esa.totalCommandProcessed.addAndGet(nbCommands);
refreshIfNeeded(cmds);
}
void processBulkDeleteCommands(List<IndexingCommand> cmds) {
// Can be optimized with a single delete by query
for (IndexingCommand cmd : cmds) {
if (cmd.getType() == Type.DELETE) {
try (Context ignored = deleteTimer.time()){
processDeleteCommand(cmd);
}
}
}
}
void processBulkIndexCommands(List<IndexingCommand> cmds) {
BulkRequestBuilder bulkRequest = esa.getClient().prepareBulk();
Set<String> docIds = new HashSet<>(cmds.size());
int bulkSize = 0;
final int maxBulkSize = getMaxBulkSize();
for (IndexingCommand cmd : cmds) {
if (cmd.getType() == Type.DELETE || cmd.getType() == Type.UPDATE_DIRECT_CHILDREN) {
continue;
}
if (!docIds.add(cmd.getTargetDocumentId())) {
// do not submit the same doc 2 times
continue;
}
try {
IndexRequestBuilder idxRequest = buildEsIndexingRequest(cmd);
if (idxRequest != null) {
bulkSize += idxRequest.request().source().length();
bulkRequest.add(idxRequest);
}
} catch (ConcurrentUpdateException e) {
throw e; // bubble up, usually until AbstractWork catches it and maybe retries
} catch (DocumentNotFoundException e) {
log.info("Ignore indexing command in bulk, doc does not exists anymore: " + cmd);
} catch (IllegalArgumentException e) {
log.error("Ignore indexing command in bulk, fail to create request: " + cmd, e);
}
if (bulkSize > maxBulkSize) {
log.warn("Max bulk size reached " + bulkSize + ", sending bulk command");
sendBulkCommand(bulkRequest, bulkSize);
bulkRequest = esa.getClient().prepareBulk();
bulkSize = 0;
}
}
sendBulkCommand(bulkRequest, bulkSize);
}
int getMaxBulkSize() {
String value = Framework.getProperty(INDEX_BULK_MAX_SIZE_PROPERTY, String.valueOf(DEFAULT_MAX_BULK_SIZE));
return Integer.parseInt(value);
}
void sendBulkCommand(BulkRequestBuilder bulkRequest, int bulkSize) {
if (bulkRequest.numberOfActions() > 0) {
if (log.isDebugEnabled()) {
logDebugMessageTruncated(String.format(
"Index %d docs (%d bytes) in bulk request: curl -XPOST 'http://localhost:9200/_bulk' -d '%s'",
bulkRequest.numberOfActions(), bulkSize,
bulkRequest.request().requests().toString()), MAX_CURL_LINE);
}
BulkResponse response = bulkRequest.execute().actionGet();
if (response.hasFailures()) {
logBulkFailure(response);
}
}
}
void logBulkFailure(BulkResponse response) {
boolean isError = false;
StringBuilder sb = new StringBuilder();
sb.append("Ignore indexing of some docs more recent versions has already been indexed");
for (BulkItemResponse item : response.getItems()) {
if (item.isFailed()) {
if (item.getFailure().getStatus() == RestStatus.CONFLICT) {
sb.append("\n ").append(item.getFailureMessage());
} else {
isError = true;
}
}
}
if (isError) {
log.error(response.buildFailureMessage());
} else {
log.debug(sb);
}
}
void refreshIfNeeded(List<IndexingCommand> cmds) {
for (IndexingCommand cmd : cmds) {
if (refreshIfNeeded(cmd))
return;
}
}
boolean refreshIfNeeded(IndexingCommand cmd) {
if (cmd.isSync()) {
esa.refresh();
return true;
}
return false;
}
@Override
public void indexNonRecursive(IndexingCommand cmd) {
Type type = cmd.getType();
if (type == Type.UPDATE_DIRECT_CHILDREN) {
// the parent don't need to be indexed
return;
}
if (type == Type.DELETE) {
try (Context ignored = deleteTimer.time()) {
processDeleteCommand(cmd);
}
} else {
try (Context ignored = indexTimer.time()) {
processIndexCommand(cmd);
}
}
refreshIfNeeded(cmd);
esa.totalCommandProcessed.incrementAndGet();
}
void processIndexCommand(IndexingCommand cmd) {
IndexRequestBuilder request;
try {
request = buildEsIndexingRequest(cmd);
} catch (DocumentNotFoundException e) {
request = null;
} catch (IllegalStateException e) {
log.error("Fail to create request for indexing command: " + cmd, e);
return;
}
if (request == null) {
log.info("Cancel indexing command because target document does not exists anymore: " + cmd);
return;
}
if (log.isDebugEnabled()) {
logDebugMessageTruncated(String.format("Index request: curl -XPUT 'http://localhost:9200/%s/%s/%s' -d '%s'",
esa.getIndexNameForRepository(cmd.getRepositoryName()), DOC_TYPE, cmd.getTargetDocumentId(),
request.request().toString()), MAX_CURL_LINE);
}
try {
request.execute().actionGet();
} catch (VersionConflictEngineException e) {
SequenceTracer.addNote("Ignore indexing of doc " + cmd.getTargetDocumentId());
log.info("Ignore indexing of doc " + cmd.getTargetDocumentId()
+ " a more recent version has already been indexed: " + e.getMessage());
}
}
void logDebugMessageTruncated(String msg, int maxSize) {
if (log.isTraceEnabled() || msg.length() < maxSize) {
// in trace mode we output the full message
log.debug(msg);
} else {
log.debug(msg.substring(0, maxSize) + "...");
}
}
void processDeleteCommand(IndexingCommand cmd) {
if (cmd.isRecurse()) {
processDeleteCommandRecursive(cmd);
} else {
processDeleteCommandNonRecursive(cmd);
}
}
void processDeleteCommandNonRecursive(IndexingCommand cmd) {
String indexName = esa.getIndexNameForRepository(cmd.getRepositoryName());
DeleteRequestBuilder request = esa.getClient().prepareDelete(indexName, DOC_TYPE, cmd.getTargetDocumentId());
if (log.isDebugEnabled()) {
log.debug(String.format("Delete request: curl -XDELETE 'http://localhost:9200/%s/%s/%s'", indexName,
DOC_TYPE, cmd.getTargetDocumentId()));
}
request.execute().actionGet();
}
void processDeleteCommandRecursive(IndexingCommand cmd) {
String indexName = esa.getIndexNameForRepository(cmd.getRepositoryName());
// we don't want to rely on target document because the document can be
// already removed
String docPath = getPathOfDocFromEs(cmd.getRepositoryName(), cmd.getTargetDocumentId());
if (docPath == null) {
if (!Framework.isTestModeSet()) {
log.warn("Trying to delete a non existing doc: " + cmd.toString());
}
return;
}
// Refresh index before bulk delete
esa.getClient().admin().indices().prepareRefresh(indexName).get();
// Run the scroll query
QueryBuilder query = QueryBuilders.constantScoreQuery(QueryBuilders.termQuery(CHILDREN_FIELD, docPath));
TimeValue keepAlive = TimeValue.timeValueMinutes(1);
SearchRequestBuilder request = esa.getClient()
.prepareSearch(indexName)
.setTypes(DOC_TYPE)
.setScroll(keepAlive)
.setSize(100)
.setFetchSource(false)
.setQuery(query);
if (log.isDebugEnabled()) {
log.debug(String.format(
"Search with scroll request: curl -XGET 'http://localhost:9200/%s/%s/_search?scroll=%s' -d '%s'",
indexName, DOC_TYPE, keepAlive, query.toString()));
}
for (SearchResponse response = request.execute().actionGet(); //
response.getHits().getHits().length > 0; //
response = runNextScroll(response, keepAlive)) {
// Build bulk delete request
BulkRequestBuilder bulkBuilder = esa.getClient().prepareBulk();
for (SearchHit hit : response.getHits().getHits()) {
bulkBuilder.add(esa.getClient().prepareDelete(hit.getIndex(), hit.getType(), hit.getId()));
}
if (log.isDebugEnabled()) {
log.debug(String.format("Bulk delete request on %s elements", bulkBuilder.numberOfActions()));
}
// Run bulk delete request
bulkBuilder.execute().actionGet();
}
}
SearchResponse runNextScroll(SearchResponse response, TimeValue keepAlive) {
if (log.isDebugEnabled()) {
log.debug(String.format(
"Scroll request: -XGET 'localhost:9200/_search/scroll' -d '{\"scroll\": \"%s\", \"scroll_id\": \"%s\" }'",
keepAlive, response.getScrollId()));
}
return esa.getClient().prepareSearchScroll(response.getScrollId()).setScroll(keepAlive).execute().actionGet();
}
/**
* Return the ecm:path of an ES document or null if not found.
*/
String getPathOfDocFromEs(String repository, String docId) {
String indexName = esa.getIndexNameForRepository(repository);
GetRequestBuilder getRequest = esa.getClient().prepareGet(indexName, DOC_TYPE, docId).setFields(PATH_FIELD);
if (log.isDebugEnabled()) {
log.debug(String.format("Get path of doc: curl -XGET 'http://localhost:9200/%s/%s/%s?fields=%s'", indexName,
DOC_TYPE, docId, PATH_FIELD));
}
GetResponse ret = getRequest.execute().actionGet();
if (!ret.isExists() || ret.getField(PATH_FIELD) == null) {
// doc not found
return null;
}
return ret.getField(PATH_FIELD).getValue().toString();
}
/**
* Return indexing request or null if the doc does not exists anymore.
*
* @throws java.lang.IllegalStateException if the command is not attached to a session
*/
IndexRequestBuilder buildEsIndexingRequest(IndexingCommand cmd) {
DocumentModel doc = cmd.getTargetDocument();
if (doc == null) {
return null;
}
try {
JsonFactory factory = new JsonFactory();
OutputStream out = new BytesStreamOutput();
JsonGenerator jsonGen = factory.createJsonGenerator(out);
jsonESDocumentWriter.writeESDocument(jsonGen, doc, cmd.getSchemas(), null);
IndexRequestBuilder ret = esa.getClient()
.prepareIndex(esa.getIndexNameForRepository(cmd.getRepositoryName()), DOC_TYPE,
cmd.getTargetDocumentId())
.setSource(jsonBuilder(out));
if (useExternalVersion && cmd.getOrder() > 0) {
ret.setVersionType(VersionType.EXTERNAL).setVersion(cmd.getOrder());
}
return ret;
} catch (IOException e) {
throw new NuxeoException("Unable to create index request for Document " + cmd.getTargetDocumentId(), e);
}
}
}