/* * JBoss, Home of Professional Open Source * Copyright 2012 Red Hat Inc. and/or its affiliates and other contributors * as indicated by the @authors tag. All rights reserved. */ package org.jboss.elasticsearch.river.remote; import java.io.IOException; import java.util.Date; import java.util.Map; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.search.SearchHit; import org.jboss.elasticsearch.river.remote.exception.RemoteDocumentNotFoundException; /** * Base abstract class for indexers used to run one index update process for one Space. * <p> * Can be used only for one run, then must be discarded and new instance created! * * @author Vlastimil Elias (velias at redhat dot com) */ public abstract class SpaceIndexerBase implements Runnable { public static final String KEY_DETAIL = "detail"; protected ESLogger logger; protected final IRemoteSystemClient remoteSystemClient; protected final IESIntegration esIntegrationComponent; /** * Configured document index structure builder to be used. */ protected final IDocumentIndexStructureBuilder documentIndexStructureBuilder; /** * Key of Space updated by this indexer. */ protected final String spaceKey; /** * Time when indexing started. */ protected long startTime = 0; /** * Info about current indexing. */ protected SpaceIndexingInfo indexingInfo; /** * Create and configure indexer. * * @param spaceKey to be indexed by this indexer. * @param remoteSystemClient configured client to be used to obtain informations from remote system. * @param esIntegrationComponent to be used to call River component and ElasticSearch functions * @param documentIndexStructureBuilder to be used during indexing */ public SpaceIndexerBase(String spaceKey, IRemoteSystemClient remoteSystemClient, IESIntegration esIntegrationComponent, IDocumentIndexStructureBuilder documentIndexStructureBuilder) { if (Utils.isEmpty(spaceKey)) throw new IllegalArgumentException("spaceKey must be defined"); this.remoteSystemClient = remoteSystemClient; this.spaceKey = spaceKey; this.esIntegrationComponent = esIntegrationComponent; this.documentIndexStructureBuilder = documentIndexStructureBuilder; } @Override public void run() { startTime = System.currentTimeMillis(); indexingInfo.startDate = new Date(startTime); try { processUpdate(); processDelete(new Date(startTime)); indexingInfo.timeElapsed = (System.currentTimeMillis() - startTime); indexingInfo.finishedOK = true; esIntegrationComponent.reportIndexingFinished(indexingInfo); logger.info("Finished {} update for Space {}. {} updated and {} deleted documents. Time elapsed {}s.", indexingInfo.fullUpdate ? "full" : "incremental", spaceKey, indexingInfo.documentsUpdated, indexingInfo.documentsDeleted, (indexingInfo.timeElapsed / 1000)); if (indexingInfo.getErrorMessage() != null) { logger .info( "Update for Space {} contained {} documents with skipped unfatal errors: " + indexingInfo.getErrorMessage(), spaceKey, indexingInfo.documentsWithError); } } catch (Throwable e) { indexingInfo.timeElapsed = (System.currentTimeMillis() - startTime); indexingInfo.addErrorMessage(e.getMessage()); indexingInfo.finishedOK = false; esIntegrationComponent.reportIndexingFinished(indexingInfo); Throwable cause = e; // do not log stacktrace for some operational exceptions to keep log file much clear if (((cause instanceof IOException) || (cause instanceof InterruptedException)) && cause.getMessage() != null) cause = null; logger.error("Failed {} update for Space {} due: {}", cause, indexingInfo.fullUpdate ? "full" : "incremental", spaceKey, e.getMessage()); } } /** * Process update of search index for configured Space. A {@link #indexingInfo.updatedCount} field is updated inside of this * method. A {@link #indexingInfo.fullUpdate} field can be updated inside of this method also. * * @throws Exception */ protected abstract void processUpdate() throws Exception; /** * Get document detail from remote system if configured, place it under <code>detail</code> key in data. * {@link IRemoteSystemClient#getChangedDocumentDetails(String, String, Map)} is used inside. * * @param documentId of document to get * @param document structure to get details for and place them into * @return true if document is found correctly, false if not found in remote system * @throws Exception in case of runtime problem */ protected boolean getDocumentDetail(String documentId, Map<String, Object> document) throws Exception { try { Object detail = remoteSystemClient.getChangedDocumentDetails(spaceKey, documentId, document); if (detail != null) { document.put(KEY_DETAIL, detail); } return true; } catch (RemoteDocumentNotFoundException e) { // skip rest of processing in this case String msg = "Detail processing problem for document with id ' documentId', so we skip it: " + e.getMessage(); indexingInfo.addErrorMessage(msg); indexingInfo.documentsWithError++; logger.warn(msg); return false; } } /** * Get document id from document. Throw exception if not there. * * @param document to get id from * @return document id * @throws IllegalArgumentException if document id is not found in document */ protected String getDocumentIdChecked(Map<String, Object> document) { String documentId = documentIndexStructureBuilder.extractDocumentId(document); if (Utils.isEmpty(documentId)) { throw new IllegalArgumentException("Document ID not found in remote system response for Space " + spaceKey + " within data: " + document); } return documentId; } protected void executeBulkUpdate(BulkRequestBuilder esBulk) { try { esIntegrationComponent.executeESBulkRequest(esBulk); } catch (BulkUpdatePartialFailureException e) { indexingInfo.addErrorMessage(e.getMessage()); indexingInfo.documentsWithError += e.getNumOfFailures(); indexingInfo.documentsUpdated -= e.getNumOfFailures(); } } /** * Process delete of documents from search index for configured Space. A {@link #deleteCount} field is updated inside * of this method. * * @param boundDate date when full update was started. We delete all search index documents not updated after this * date (which means these documents are not in remote system anymore). */ protected void processDelete(Date boundDate) throws Exception { if (boundDate == null) throw new IllegalArgumentException("boundDate must be set"); if (!indexingInfo.fullUpdate) return; logger.debug("Go to process remote system deletes for Space {} for documents not updated in index after {}", spaceKey, boundDate); String indexName = documentIndexStructureBuilder.getDocumentSearchIndexName(spaceKey); esIntegrationComponent.refreshSearchIndex(indexName); logger.debug("go to delete indexed documents for space {} not updated after {}", spaceKey, boundDate); SearchRequestBuilder srb = esIntegrationComponent.prepareESScrollSearchRequestBuilder(indexName); documentIndexStructureBuilder.buildSearchForIndexedDocumentsNotUpdatedAfter(srb, spaceKey, boundDate); SearchResponse scrollResp = esIntegrationComponent.executeESSearchRequest(srb); if (scrollResp.getHits().getTotalHits() > 0) { if (isClosed()) throw new InterruptedException("Interrupted because River is closed"); scrollResp = esIntegrationComponent.executeESScrollSearchNextRequest(scrollResp); BulkRequestBuilder esBulk = esIntegrationComponent.prepareESBulkRequestBuilder(); while (scrollResp.getHits().getHits().length > 0) { for (SearchHit hit : scrollResp.getHits()) { logger.debug("Go to delete indexed document for ES document id {}", hit.getId()); if (documentIndexStructureBuilder.deleteESDocument(esBulk, hit)) { indexingInfo.documentsDeleted++; } else { indexingInfo.commentsDeleted++; } } if (isClosed()) throw new InterruptedException("Interrupted because River is closed"); scrollResp = esIntegrationComponent.executeESScrollSearchNextRequest(scrollResp); } esIntegrationComponent.executeESBulkRequest(esBulk); } } /** * Prepare delete of es index documents based on remote document id. * * @param esBulk to prepare delete into * @param documentId to prepare delete for * @return true if at least one delete has been prepared in the method * @throws InterruptedException * @throws Exception */ protected boolean prepareDeleteByRemoteDocumentId(BulkRequestBuilder esBulk, String documentId) throws InterruptedException, Exception { boolean deletedInThisBulk = false; String indexName = documentIndexStructureBuilder.getDocumentSearchIndexName(spaceKey); esIntegrationComponent.refreshSearchIndex(indexName); logger.debug("go to delete indexed documents for space {} and remote id {}", spaceKey, documentId); SearchRequestBuilder srb = esIntegrationComponent.prepareESScrollSearchRequestBuilder(indexName); documentIndexStructureBuilder.buildSearchForIndexedDocumentsWithRemoteId(srb, spaceKey, documentId); SearchResponse scrollResp = esIntegrationComponent.executeESSearchRequest(srb); if (scrollResp.getHits().getTotalHits() > 0) { if (isClosed()) throw new InterruptedException("Interrupted because River is closed"); scrollResp = esIntegrationComponent.executeESScrollSearchNextRequest(scrollResp); while (scrollResp.getHits().getHits().length > 0) { for (SearchHit hit : scrollResp.getHits()) { logger.debug("Go to delete indexed document for ES document id {}", hit.getId()); if (documentIndexStructureBuilder.deleteESDocument(esBulk, hit)) { indexingInfo.documentsDeleted++; } else { indexingInfo.commentsDeleted++; } deletedInThisBulk = true; } scrollResp = esIntegrationComponent.executeESScrollSearchNextRequest(scrollResp); } } return deletedInThisBulk; } /** * Check if we must interrupt update process because ElasticSearch runtime needs it. * * @return true if we must interrupt update process */ protected boolean isClosed() { return esIntegrationComponent != null && esIntegrationComponent.isClosed(); } /** * Get current indexing info. * * @return indexing info instance. */ public SpaceIndexingInfo getIndexingInfo() { return indexingInfo; } }